################################################################################### # This is a modified version of nnscore.py. Contains a new set of neural # # networks. # # # # CHANGES: # # 1. The method containing weights of the neural networks is removed. Instead # # they are now saved into a pickle file (networks.pickle) # # 2. A new deep learning based model is added on experimental basis just to # # test the workflow of combining two types of networks together. The score # # provided by the new network doesn't count towards the overall performance # # of nnscore.py # # # # Necessary files to run: dl_networks_03 [folder] # # # # Last Modified by: Mahmudulla Hassan on 03/13/2018 # ################################################################################### # NNScore 2.01 is released under the GNU General Public License (see http://www.gnu.org/licenses/gpl.html). # If you have any questions, comments, or suggestions, please don't hesitate to contact me, Jacob # Durrant, at jdurrant [at] ucsd [dot] edu. If you use NNScore 2.01 in your work, please cite [REFERENCE HERE]. ################################## MODIFY THIS VARIABLE TO POINT TO THE AUTODOCK VINA EXECUTABLE ################################## vina_executable = "/PATH/TO/VINA_1_1_2/vina" # Example: vina_executable = "/gpfs/autodock_vina_1_1_2_linux_x86/bin/vina" ################################################################################################################################### import numpy as np import textwrap import math import os import sys import textwrap import glob import pickle import csv import re #import cPickle ######## IMPORT MODULES NECESSARY FOR KERAS AND DLSCORE MODEL IMPLEMENTATION ############ import keras from keras import metrics from keras.models import Sequential, model_from_json import keras.backend as K import pickle import h5py # ignore warning from tensorflow os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' ########################################################################################## # The ffnet class was derived from the ffnet python package developed by Marek Wojciechowski (http://ffnet.sourceforge.net/). # As Mr. Wojciechowski's program was released under the GPL license, NNScore is likewise GPL licensed. class ffnet: def load(self, net_array): self.outno = net_array['outno'] self.eni = net_array['eni'] self.weights = net_array['weights'] self.conec = net_array['conec'] self.deo = net_array['deo'] self.inno = net_array['inno'] self.units = {} self.output = {} def normcall(self, input): #Takes single input pattern and returns network output #This have the same functionality as recall but now input and #output are normalized inside the function. #eni = [ai, bi], eno = [ao, bo] - parameters of linear mapping self.input = input #set input units self.setin() #print("FROM FFNET PROP: MAX INPUT: ", np.max(self.input)) #print("INPUT: ", input) #print("UNITS: ", self.units) #propagate signals self.prop() #get output self.getout() return self.output[1] def setin(self): #normalize and set input units for k in range(1,len(self.inno) + 1): self.units[self.inno[k]] = self.eni[k][1] * self.input[k-1] + self.eni[k][2] # because self.input is a python list, and the others were inputted from a fortran-format file #print("K: ", k, " len(self.inno): ", len(self.inno), " self.inno[k]: ", self.inno[k], " self.eni[k][1]: ", self.eni[k][1], " self.eni[k][2]: ", self.eni[k][2]) #print("input[", k-1, "] = ", self.input[k-1]) def prop(self): #Gets conec and units with input already set #and calculates all activations. #Identity input and sigmoid activation function for other units #is assumed #propagate signals with sigmoid activation function if len(self.conec) > 0: ctrg = self.conec[1][2] self.units[ctrg] = 0.0 for xn in range(1,len(self.conec) + 1): src = self.conec[xn][1] trg = self.conec[xn][2] # if next unit if trg != ctrg: self.units[ctrg] = 1.0/(1.0+math.exp(-self.units[ctrg])) ctrg = trg if src == 0: # handle bias self.units[ctrg] = self.weights[xn] else: self.units[ctrg] = self.units[src] * self.weights[xn] else: if src == 0: # handle bias self.units[ctrg] = self.units[ctrg] + self.weights[xn] else: self.units[ctrg] = self.units[ctrg] + self.units[src] * self.weights[xn] self.units[ctrg] = 1.0/(1.0+math.exp(-self.units[ctrg])) # for last unit def getout(self): #get and denormalize output units for k in range(1,len(self.outno)+1): self.output[k] = self.deo[k][1] * self.units[self.outno[k]] + self.deo[k][2] #def dlscore(): # Load the model # with open("model.json", "r") as json_file: # loaded_model = model_from_json(json_file.read()) # Load weights # loaded_model.load_weights("model.h5") # Compile the model # loaded_model.compile( # loss='mean_squared_error', # optimizer=keras.optimizers.Adam(lr=0.001), # metrics=[metrics.MSE]) # return loaded_model def dl_nets(): """ Yields new set of deep learning based networks """ # Read the networks directory = 'dl_networks_03/' with open(directory + 'sorted_models.pickle', 'rb') as f: model_files = pickle.load(f) with open(directory + 'sorted_weights.pickle', 'rb') as f: weight_files = pickle.load(f) assert(len(model_files) == len(weight_files)), \ 'Number of model files and the weight files are not the same.' for model, weight in zip(model_files, weight_files): # Load the network with open(model, 'r') as json_file: loaded_model = model_from_json(json_file.read()) # Load the weights loaded_model.load_weights(weight) # Compile the network #loaded_model.compile( # loss='mean_squared_error', # optimizer=keras.optimizers.Adam(lr=0.001), # metrics=[metrics.mse]) yield loaded_model #def sensoring(test_x, train_y, pred): # """ Sensor the predicted data to get rid of outliers """ # mn = np.min(train_y) # mx = np.max(train_y) # pred = np.minimum(pred, mx) # pred = np.maximum(pred, mn) # return pred class point: x=99999.0 y=99999.0 z=99999.0 def __init__ (self, x, y ,z): self.x = x self.y = y self.z = z def copy_of(self): return point(self.x, self.y, self.z) def dist_to(self,apoint): return math.sqrt(math.pow(self.x - apoint.x,2) + math.pow(self.y - apoint.y,2) + math.pow(self.z - apoint.z,2)) def Magnitude(self): return self.dist_to(point(0,0,0)) def CreatePDBLine(self, index): output = "ATOM " output = output + str(index).rjust(6) + "X".rjust(5) + "XXX".rjust(4) output = output + ("%.3f" % self.x).rjust(18) output = output + ("%.3f" % self.y).rjust(8) output = output + ("%.3f" % self.z).rjust(8) output = output + "X".rjust(24) return output class atom: def __init__ (self): self.atomname = "" self.residue = "" self.coordinates = point(99999, 99999, 99999) self.element = "" self.PDBIndex = "" self.line="" self.atomtype="" self.IndeciesOfAtomsConnecting=[] self.charge = 0 self.resid = 0 self.chain = "" self.structure = "" self.comment = "" def copy_of(self): theatom = atom() theatom.atomname = self.atomname theatom.residue = self.residue theatom.coordinates = self.coordinates.copy_of() theatom.element = self.element theatom.PDBIndex = self.PDBIndex theatom.line= self.line theatom.atomtype= self.atomtype theatom.IndeciesOfAtomsConnecting = self.IndeciesOfAtomsConnecting[:] theatom.charge = self.charge theatom.resid = self.resid theatom.chain = self.chain theatom.structure = self.structure theatom.comment = self.comment return theatom def CreatePDBLine(self, index): output = "ATOM " output = output + str(index).rjust(6) + self.atomname.rjust(5) + self.residue.rjust(4) output = output + ("%.3f" % self.coordinates.x).rjust(18) output = output + ("%.3f" % self.coordinates.y).rjust(8) output = output + ("%.3f" % self.coordinates.z).rjust(8) output = output + self.element.rjust(24) return output def NumberOfNeighbors(self): return len(self.IndeciesOfAtomsConnecting) def AddNeighborAtomIndex(self, index): if not (index in self.IndeciesOfAtomsConnecting): self.IndeciesOfAtomsConnecting.append(index) def SideChainOrBackBone(self): # only really applies to proteins, assuming standard atom names if self.atomname.strip() == "CA" or self.atomname.strip() == "C" or self.atomname.strip() == "O" or self.atomname.strip() == "N": return "BACKBONE" else: return "SIDECHAIN" def ReadPDBLine(self, Line): self.line = Line self.atomname = Line[11:16].strip() if len(self.atomname)==1: self.atomname = self.atomname + " " elif len(self.atomname)==2: self.atomname = self.atomname + " " elif len(self.atomname)==3: self.atomname = self.atomname + " " # This line is necessary for babel to work, though many PDBs in the PDB would have this line commented out self.coordinates = point(float(Line[30:38]), float(Line[38:46]), float(Line[46:54])) # now atom type (for pdbqt) self.atomtype = Line[77:79].strip().upper() if Line[69:76].strip() != "": self.charge = float(Line[69:76]) else: self.charge = 0.0 if self.element == "": # try to guess at element from name two_letters = self.atomname[0:2].strip().upper() if two_letters=='BR': self.element='BR' elif two_letters=='CL': self.element='CL' elif two_letters=='BI': self.element='BI' elif two_letters=='AS': self.element='AS' elif two_letters=='AG': self.element='AG' elif two_letters=='LI': self.element='LI' #elif two_letters=='HG': # self.element='HG' elif two_letters=='MG': self.element='MG' elif two_letters=='MN': self.element='MN' elif two_letters=='RH': self.element='RH' elif two_letters=='ZN': self.element='ZN' elif two_letters=='FE': self.element='FE' else: #So, just assume it's the first letter. # Any number needs to be removed from the element name self.element = self.atomname self.element = self.element.replace('0','') self.element = self.element.replace('1','') self.element = self.element.replace('2','') self.element = self.element.replace('3','') self.element = self.element.replace('4','') self.element = self.element.replace('5','') self.element = self.element.replace('6','') self.element = self.element.replace('7','') self.element = self.element.replace('8','') self.element = self.element.replace('9','') self.element = self.element.replace('@','') self.element = self.element[0:1].strip().upper() self.PDBIndex = Line[6:12].strip() self.residue = Line[16:20] self.residue = " " + self.residue[-3:] # this only uses the rightmost three characters, essentially removing unique rotamer identification if Line[23:26].strip() != "": self.resid = int(Line[23:26]) else: self.resid = 1 self.chain = Line[21:22] if self.residue.strip() == "": self.residue = " MOL" class PDB: def __init__ (self): self.AllAtoms={} self.NonProteinAtoms = {} self.max_x = -9999.99 self.min_x = 9999.99 self.max_y = -9999.99 self.min_y = 9999.99 self.max_z = -9999.99 self.min_z = 9999.99 self.rotateable_bonds_count = 0 self.functions = MathFunctions() self.protein_resnames = ["ALA", "ARG", "ASN", "ASP", "ASH", "ASX", "CYS", "CYM", "CYX", "GLN", "GLU", "GLH", "GLX", "GLY", "HIS", "HID", "HIE", "HIP", "ILE", "LEU", "LYS", "LYN", "MET", "PHE", "PRO", "SER", "THR", "TRP", "TYR", "VAL"] self.aromatic_rings = [] self.charges = [] # a list of points self.OrigFileName = "" def LoadPDB_from_file(self, FileName, line_header=""): self.line_header=line_header # Now load the file into a list file = open(FileName,"r") lines = file.readlines() file.close() self.LoadPDB_from_list(lines, self.line_header) def LoadPDB_from_list(self, lines, line_header=""): self.line_header=line_header autoindex = 1 self.__init__() atom_already_loaded = [] # going to keep track of atomname_resid_chain pairs, to make sure redundants aren't loaded. This basically # gets rid of rotomers, I think. for t in range(0,len(lines)): line=lines[t] if "between atoms" in line and " A " in line: self.rotateable_bonds_count = self.rotateable_bonds_count + 1 if len(line) >= 7: if line[0:4]=="ATOM" or line[0:6]=="HETATM": # Load atom data (coordinates, etc.) TempAtom = atom() TempAtom.ReadPDBLine(line) key = TempAtom.atomname.strip() + "_" + str(TempAtom.resid) + "_" + TempAtom.residue.strip() + "_" + TempAtom.chain.strip() # this string unique identifies each atom if key in atom_already_loaded and TempAtom.residue.strip() in self.protein_resnames: # so this is a receptor atom that has already been loaded once print(self.line_header + "WARNING: Duplicate receptor atom detected: \"" + TempAtom.line.strip()+ "\". Not loading this duplicate.") #print "" if not key in atom_already_loaded or not TempAtom.residue.strip() in self.protein_resnames: # so either the atom hasn't been loaded, or else it's a non-receptor atom # so note that non-receptor atoms can have redundant names, but receptor atoms cannot. # This is because protein residues often contain rotamers atom_already_loaded.append(key) # so each atom can only be loaded once. No rotamers. self.AllAtoms[autoindex] = TempAtom # So you're actually reindexing everything here. if not TempAtom.residue[-3:] in self.protein_resnames: self.NonProteinAtoms[autoindex] = TempAtom autoindex = autoindex + 1 self.CheckProteinFormat() self.CreateBondsByDistance() # only for the ligand, because bonds can be inferred based on atomnames from PDB self.assign_aromatic_rings() self.assign_charges() def printout(self, thestring): lines = textwrap.wrap(thestring, 80) for line in lines: print(line) def SavePDB(self, filename): f = open(filename, 'w') towrite = self.SavePDBString() if towrite.strip() == "": towrite = "ATOM 1 X XXX 0.000 0.000 0.000 X" # just so no PDB is empty, VMD will load them all f.write(towrite) f.close() def SavePDBString(self): ToOutput = "" # write coordinates for atomindex in self.AllAtoms: ToOutput = ToOutput + self.AllAtoms[atomindex].CreatePDBLine(atomindex) + "\n" return ToOutput def AddNewAtom(self, atom): # first get available index t = 1 while t in self.AllAtoms.keys(): t = t + 1 # now add atom self.AllAtoms[t] = atom def connected_atoms_of_given_element(self, index, connected_atom_element): atom = self.AllAtoms[index] connected_atoms = [] for index2 in atom.IndeciesOfAtomsConnecting: atom2 = self.AllAtoms[index2] if atom2.element == connected_atom_element: connected_atoms.append(index2) return connected_atoms def connected_heavy_atoms(self, index): atom = self.AllAtoms[index] connected_atoms = [] for index2 in atom.IndeciesOfAtomsConnecting: atom2 = self.AllAtoms[index2] if atom2.element != "H": connected_atoms.append(index2) return connected_atoms def CheckProteinFormat(self): curr_res = "" first = True residue = [] for atom_index in self.AllAtoms: atom = self.AllAtoms[atom_index] key = atom.residue + "_" + str(atom.resid) + "_" + atom.chain if first == True: curr_res = key first = False if key != curr_res: self.CheckProteinFormat_process_residue(residue, last_key) residue = [] curr_res = key residue.append(atom.atomname.strip()) last_key = key self.CheckProteinFormat_process_residue(residue, last_key) def CheckProteinFormat_process_residue(self, residue, last_key): temp = last_key.strip().split("_") resname = temp[0] real_resname = resname[-3:] resid = temp[1] chain = temp[2] if real_resname in self.protein_resnames: # so it's a protein residue if not "N" in residue: self.printout('WARNING: There is no atom named "N" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine secondary structure. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if not "C" in residue: self.printout('WARNING: There is no atom named "C" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine secondary structure. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if not "CA" in residue: self.printout('WARNING: There is no atom named "CA" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine secondary structure. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if real_resname == "GLU" or real_resname == "GLH" or real_resname == "GLX": if not "OE1" in residue: self.printout('WARNING: There is no atom named "OE1" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine salt-bridge interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if not "OE2" in residue: self.printout('WARNING: There is no atom named "OE2" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine salt-bridge interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if real_resname == "ASP" or real_resname == "ASH" or real_resname == "ASX": if not "OD1" in residue: self.printout('WARNING: There is no atom named "OD1" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine salt-bridge interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if not "OD2" in residue: self.printout('WARNING: There is no atom named "OD2" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine salt-bridge interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if real_resname == "LYS" or real_resname == "LYN": if not "NZ" in residue: self.printout('WARNING: There is no atom named "NZ" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-cation and salt-bridge interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if real_resname == "ARG": if not "NH1" in residue: self.printout('WARNING: There is no atom named "NH1" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-cation and salt-bridge interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if not "NH2" in residue: self.printout('WARNING: There is no atom named "NH2" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-cation and salt-bridge interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if real_resname == "HIS" or real_resname == "HID" or real_resname == "HIE" or real_resname == "HIP": if not "NE2" in residue: self.printout('WARNING: There is no atom named "NE2" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-cation and salt-bridge interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if not "ND1" in residue: self.printout('WARNING: There is no atom named "ND1" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-cation and salt-bridge interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if real_resname == "PHE": if not "CG" in residue: self.printout('WARNING: There is no atom named "CG" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if not "CD1" in residue: self.printout('WARNING: There is no atom named "CD1" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if not "CD2" in residue: self.printout('WARNING: There is no atom named "CD2" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if not "CE1" in residue: self.printout('WARNING: There is no atom named "CE1" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if not "CE2" in residue: self.printout('WARNING: There is no atom named "CE2" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if not "CZ" in residue: self.printout('WARNING: There is no atom named "CZ" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if real_resname == "TYR": if not "CG" in residue: self.printout('WARNING: There is no atom named "CG" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if not "CD1" in residue: self.printout('WARNING: There is no atom named "CD1" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if not "CD2" in residue: self.printout('WARNING: There is no atom named "CD2" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if not "CE1" in residue: self.printout('WARNING: There is no atom named "CE1" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if not "CE2" in residue: self.printout('WARNING: There is no atom named "CE2" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if not "CZ" in residue: self.printout('WARNING: There is no atom named "CZ" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if real_resname == "TRP": if not "CG" in residue: self.printout('WARNING: There is no atom named "CG" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if not "CD1" in residue: self.printout('WARNING: There is no atom named "CD1" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if not "CD2" in residue: self.printout('WARNING: There is no atom named "CD2" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if not "NE1" in residue: self.printout('WARNING: There is no atom named "NE1" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if not "CE2" in residue: self.printout('WARNING: There is no atom named "CE2" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if not "CE3" in residue: self.printout('WARNING: There is no atom named "CE3" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if not "CZ2" in residue: self.printout('WARNING: There is no atom named "CZ2" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if not "CZ3" in residue: self.printout('WARNING: There is no atom named "CZ3" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if not "CH2" in residue: self.printout('WARNING: There is no atom named "CH2" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if real_resname == "HIS" or real_resname == "HID" or real_resname == "HIE" or real_resname == "HIP": if not "CG" in residue: self.printout('WARNING: There is no atom named "CG" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if not "ND1" in residue: self.printout('WARNING: There is no atom named "ND1" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if not "CD2" in residue: self.printout('WARNING: There is no atom named "CD2" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if not "CE1" in residue: self.printout('WARNING: There is no atom named "CE1" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") if not "NE2" in residue: self.printout('WARNING: There is no atom named "NE2" in the protein residue ' + last_key + '. Please use standard naming conventions for all protein residues. This atom is needed to determine pi-pi and pi-cation interactions. If this residue is far from the active site, this warning may not affect the NNScore.') print("") # Functions to determine the bond connectivity based on distance # ============================================================== def CreateBondsByDistance(self): for AtomIndex1 in self.NonProteinAtoms: atom1 = self.NonProteinAtoms[AtomIndex1] if not atom1.residue[-3:] in self.protein_resnames: # so it's not a protein residue for AtomIndex2 in self.NonProteinAtoms: if AtomIndex1 != AtomIndex2: atom2 = self.NonProteinAtoms[AtomIndex2] if not atom2.residue[-3:] in self.protein_resnames: # so it's not a protein residue dist = self.functions.distance(atom1.coordinates, atom2.coordinates) if (dist < self.BondLength(atom1.element, atom2.element) * 1.2): atom1.AddNeighborAtomIndex(AtomIndex2) atom2.AddNeighborAtomIndex(AtomIndex1) def BondLength(self, element1, element2): '''Bond lengths taken from Handbook of Chemistry and Physics. The information provided there was very specific, so I tried to pick representative examples and used the bond lengths from those. Sitautions could arise where these lengths would be incorrect, probably slight errors (<0.06) in the hundreds.''' distance = 0.0 if element1 == "C" and element2 == "C": distance = 1.53 if element1 == "N" and element2 == "N": distance = 1.425 if element1 == "O" and element2 == "O": distance = 1.469 if element1 == "S" and element2 == "S": distance = 2.048 if (element1 == "C" and element2 == "H") or (element1 == "H" and element2 == "C"): distance = 1.059 if (element1 == "C" and element2 == "N") or (element1 == "N" and element2 == "C"): distance = 1.469 if (element1 == "C" and element2 == "O") or (element1 == "O" and element2 == "C"): distance = 1.413 if (element1 == "C" and element2 == "S") or (element1 == "S" and element2 == "C"): distance = 1.819 if (element1 == "N" and element2 == "H") or (element1 == "H" and element2 == "N"): distance = 1.009 if (element1 == "N" and element2 == "O") or (element1 == "O" and element2 == "N"): distance = 1.463 if (element1 == "O" and element2 == "S") or (element1 == "S" and element2 == "O"): distance = 1.577 if (element1 == "O" and element2 == "H") or (element1 == "H" and element2 == "O"): distance = 0.967 if (element1 == "S" and element2 == "H") or (element1 == "H" and element2 == "S"): distance = 2.025/1.5 # This one not from source sited above. Not sure where it's from, but it wouldn't ever be used in the current context ("AutoGrow") if (element1 == "S" and element2 == "N") or (element1 == "N" and element2 == "S"): distance = 1.633 if (element1 == "C" and element2 == "F") or (element1 == "F" and element2 == "C"): distance = 1.399 if (element1 == "C" and element2 == "CL") or (element1 == "CL" and element2 == "C"): distance = 1.790 if (element1 == "C" and element2 == "BR") or (element1 == "BR" and element2 == "C"): distance = 1.910 if (element1 == "C" and element2 == "I") or (element1 == "I" and element2 == "C"): distance=2.162 if (element1 == "S" and element2 == "BR") or (element1 == "BR" and element2 == "S"): distance = 2.321 if (element1 == "S" and element2 == "CL") or (element1 == "CL" and element2 == "S"): distance = 2.283 if (element1 == "S" and element2 == "F") or (element1 == "F" and element2 == "S"): distance = 1.640 if (element1 == "S" and element2 == "I") or (element1 == "I" and element2 == "S"): distance= 2.687 if (element1 == "P" and element2 == "BR") or (element1 == "BR" and element2 == "P"): distance = 2.366 if (element1 == "P" and element2 == "CL") or (element1 == "CL" and element2 == "P"): distance = 2.008 if (element1 == "P" and element2 == "F") or (element1 == "F" and element2 == "P"): distance = 1.495 if (element1 == "P" and element2 == "I") or (element1 == "I" and element2 == "P"): distance= 2.490 if (element1 == "P" and element2 == "O") or (element1 == "O" and element2 == "P"): distance= 1.6 # estimate based on eye balling Handbook of Chemistry and Physics if (element1 == "N" and element2 == "BR") or (element1 == "BR" and element2 == "N"): distance = 1.843 if (element1 == "N" and element2 == "CL") or (element1 == "CL" and element2 == "N"): distance = 1.743 if (element1 == "N" and element2 == "F") or (element1 == "F" and element2 == "N"): distance = 1.406 if (element1 == "N" and element2 == "I") or (element1 == "I" and element2 == "N"): distance= 2.2 if (element1 == "SI" and element2 == "BR") or (element1 == "BR" and element2 == "SI"): distance = 2.284 if (element1 == "SI" and element2 == "CL") or (element1 == "CL" and element2 == "SI"): distance = 2.072 if (element1 == "SI" and element2 == "F") or (element1 == "F" and element2 == "SI"): distance = 1.636 if (element1 == "SI" and element2 == "P") or (element1 == "P" and element2 == "SI"): distance= 2.264 if (element1 == "SI" and element2 == "S") or (element1 == "S" and element2 == "SI"): distance= 2.145 if (element1 == "SI" and element2 == "SI") or (element1 == "SI" and element2 == "SI"): distance= 2.359 if (element1 == "SI" and element2 == "C") or (element1 == "C" and element2 == "SI"): distance= 1.888 if (element1 == "SI" and element2 == "N") or (element1 == "N" and element2 == "SI"): distance= 1.743 if (element1 == "SI" and element2 == "O") or (element1 == "O" and element2 == "SI"): distance= 1.631 return distance; # Functions to identify positive charges # ====================================== def assign_charges(self): # Get all the quartinary amines on non-protein residues (these are the only non-protein groups that will be identified as positively charged) AllCharged = [] for atom_index in self.NonProteinAtoms: atom = self.NonProteinAtoms[atom_index] if atom.element == "MG" or atom.element == "MN" or atom.element == "RH" or atom.element == "ZN" or atom.element == "FE" or atom.element == "BI" or atom.element == "AS" or atom.element == "AG": chrg = self.charged(atom.coordinates, [atom_index], True) self.charges.append(chrg) if atom.element == "N": if atom.NumberOfNeighbors() == 4: # a quartinary amine, so it's easy indexes = [atom_index] indexes.extend(atom.IndeciesOfAtomsConnecting) chrg = self.charged(atom.coordinates, indexes, True) # so the indicies stored is just the index of the nitrogen and any attached atoms self.charges.append(chrg) elif atom.NumberOfNeighbors() == 3: # maybe you only have two hydrogen's added, by they're sp3 hybridized. Just count this as a quartinary amine, since I think the positive charge would be stabalized. nitrogen = atom atom1 = self.AllAtoms[atom.IndeciesOfAtomsConnecting[0]] atom2 = self.AllAtoms[atom.IndeciesOfAtomsConnecting[1]] atom3 = self.AllAtoms[atom.IndeciesOfAtomsConnecting[2]] angle1 = self.functions.angle_between_three_points(atom1.coordinates, nitrogen.coordinates, atom2.coordinates) * 180.0 / math.pi angle2 = self.functions.angle_between_three_points(atom1.coordinates, nitrogen.coordinates, atom3.coordinates) * 180.0 / math.pi angle3 = self.functions.angle_between_three_points(atom2.coordinates, nitrogen.coordinates, atom3.coordinates) * 180.0 / math.pi average_angle = (angle1 + angle2 + angle3) / 3 if math.fabs(average_angle - 109.0) < 5.0: indexes = [atom_index] indexes.extend(atom.IndeciesOfAtomsConnecting) chrg = self.charged(nitrogen.coordinates, indexes, True) # so indexes added are the nitrogen and any attached atoms. self.charges.append(chrg) if atom.element == "C": # let's check for guanidino-like groups (actually H2N-C-NH2, where not CN3.) if atom.NumberOfNeighbors() == 3: # the carbon has only three atoms connected to it nitrogens = self.connected_atoms_of_given_element(atom_index,"N") if len(nitrogens) >= 2: # so carbon is connected to at least two nitrogens # now we need to count the number of nitrogens that are only connected to one heavy atom (the carbon) nitrogens_to_use = [] all_connected = atom.IndeciesOfAtomsConnecting[:] not_isolated = -1 for atmindex in nitrogens: if len(self.connected_heavy_atoms(atmindex)) == 1: nitrogens_to_use.append(atmindex) all_connected.remove(atmindex) if len(all_connected) > 0: not_isolated = all_connected[0] # get the atom that connects this charged group to the rest of the molecule, ultimately to make sure it's sp3 hybridized if len(nitrogens_to_use) == 2 and not_isolated != -1: # so there are at two nitrogens that are only connected to the carbon (and probably some hydrogens) # now you need to make sure not_isolated atom is sp3 hybridized not_isolated_atom = self.AllAtoms[not_isolated] if (not_isolated_atom.element == "C" and not_isolated_atom.NumberOfNeighbors()==4) or (not_isolated_atom.element == "O" and not_isolated_atom.NumberOfNeighbors()==2) or not_isolated_atom.element == "N" or not_isolated_atom.element == "S" or not_isolated_atom.element == "P": pt = self.AllAtoms[nitrogens_to_use[0]].coordinates.copy_of() pt.x = pt.x + self.AllAtoms[nitrogens_to_use[1]].coordinates.x pt.y = pt.y + self.AllAtoms[nitrogens_to_use[1]].coordinates.y pt.z = pt.z + self.AllAtoms[nitrogens_to_use[1]].coordinates.z pt.x = pt.x / 2.0 pt.y = pt.y / 2.0 pt.z = pt.z / 2.0 indexes = [atom_index] indexes.extend(nitrogens_to_use) indexes.extend(self.connected_atoms_of_given_element(nitrogens_to_use[0],"H")) indexes.extend(self.connected_atoms_of_given_element(nitrogens_to_use[1],"H")) chrg = self.charged(pt, indexes, True) # True because it's positive self.charges.append(chrg) if atom.element == "C": # let's check for a carboxylate if atom.NumberOfNeighbors() == 3: # a carboxylate carbon will have three items connected to it. oxygens = self.connected_atoms_of_given_element(atom_index,"O") if len(oxygens) == 2: # a carboxylate will have two oxygens connected to it. # now, each of the oxygens should be connected to only one heavy atom (so if it's connected to a hydrogen, that's okay) if len(self.connected_heavy_atoms(oxygens[0])) == 1 and len(self.connected_heavy_atoms(oxygens[1])) == 1: # so it's a carboxylate! Add a negative charge. pt = self.AllAtoms[oxygens[0]].coordinates.copy_of() pt.x = pt.x + self.AllAtoms[oxygens[1]].coordinates.x pt.y = pt.y + self.AllAtoms[oxygens[1]].coordinates.y pt.z = pt.z + self.AllAtoms[oxygens[1]].coordinates.z pt.x = pt.x / 2.0 pt.y = pt.y / 2.0 pt.z = pt.z / 2.0 chrg = self.charged(pt, [oxygens[0], atom_index, oxygens[1]], False) self.charges.append(chrg) if atom.element == "P": # let's check for a phosphate or anything where a phosphorus is bound to two oxygens where both oxygens are bound to only one heavy atom (the phosphorus). I think this will get several phosphorus substances. oxygens = self.connected_atoms_of_given_element(atom_index,"O") if len(oxygens) >=2: # the phosphorus is bound to at least two oxygens # now count the number of oxygens that are only bound to the phosphorus count = 0 for oxygen_index in oxygens: if len(self.connected_heavy_atoms(oxygen_index)) == 1: count = count + 1 if count >=2: # so there are at least two oxygens that are only bound to the phosphorus indexes = [atom_index] indexes.extend(oxygens) chrg = self.charged(atom.coordinates, indexes, False) self.charges.append(chrg) if atom.element == "S": # let's check for a sulfonate or anything where a sulfur is bound to at least three oxygens and at least three are bound to only the sulfur (or the sulfur and a hydrogen). oxygens = self.connected_atoms_of_given_element(atom_index,"O") if len(oxygens) >=3: # the sulfur is bound to at least three oxygens # now count the number of oxygens that are only bound to the sulfur count = 0 for oxygen_index in oxygens: if len(self.connected_heavy_atoms(oxygen_index)) == 1: count = count + 1 if count >=3: # so there are at least three oxygens that are only bound to the sulfur indexes = [atom_index] indexes.extend(oxygens) chrg = self.charged(atom.coordinates, indexes, False) self.charges.append(chrg) # Now that you've found all the positive charges in non-protein residues, it's time to look for aromatic rings in protein residues curr_res = "" first = True residue = [] for atom_index in self.AllAtoms: atom = self.AllAtoms[atom_index] key = atom.residue + "_" + str(atom.resid) + "_" + atom.chain if first == True: curr_res = key first = False if key != curr_res: self.assign_charged_from_protein_process_residue(residue, last_key) residue = [] curr_res = key residue.append(atom_index) last_key = key self.assign_charged_from_protein_process_residue(residue, last_key) def assign_charged_from_protein_process_residue(self, residue, last_key): temp = last_key.strip().split("_") resname = temp[0] real_resname = resname[-3:] resid = temp[1] chain = temp[2] if real_resname == "LYS" or real_resname == "LYN": # regardless of protonation state, assume it's charged. for index in residue: atom = self.AllAtoms[index] if atom.atomname.strip() == "NZ": # quickly go through the residue and get the hydrogens attached to this nitrogen to include in the index list indexes = [index] for index2 in residue: atom2 = self.AllAtoms[index2] if atom2.atomname.strip() == "HZ1": indexes.append(index2) if atom2.atomname.strip() == "HZ2": indexes.append(index2) if atom2.atomname.strip() == "HZ3": indexes.append(index2) chrg = self.charged(atom.coordinates, indexes, True) self.charges.append(chrg) break if real_resname == "ARG": charge_pt = point(0.0,0.0,0.0) count = 0.0 indices = [] for index in residue: atom = self.AllAtoms[index] if atom.atomname.strip() == "NH1": charge_pt.x = charge_pt.x + atom.coordinates.x charge_pt.y = charge_pt.y + atom.coordinates.y charge_pt.z = charge_pt.z + atom.coordinates.z indices.append(index) count = count + 1.0 if atom.atomname.strip() == "NH2": charge_pt.x = charge_pt.x + atom.coordinates.x charge_pt.y = charge_pt.y + atom.coordinates.y charge_pt.z = charge_pt.z + atom.coordinates.z indices.append(index) count = count + 1.0 if atom.atomname.strip() == "2HH2": indices.append(index) if atom.atomname.strip() == "1HH2": indices.append(index) if atom.atomname.strip() == "CZ": indices.append(index) if atom.atomname.strip() == "2HH1": indices.append(index) if atom.atomname.strip() == "1HH1": indices.append(index) if count != 0.0: charge_pt.x = charge_pt.x / count charge_pt.y = charge_pt.y / count charge_pt.z = charge_pt.z / count if charge_pt.x != 0.0 or charge_pt.y != 0.0 or charge_pt.z != 0.0: chrg = self.charged(charge_pt, indices, True) self.charges.append(chrg) if real_resname == "HIS" or real_resname == "HID" or real_resname == "HIE" or real_resname == "HIP": # regardless of protonation state, assume it's charged. This based on "The Cation-Pi Interaction," which suggests protonated state would be stabalized. But let's not consider HIS when doing salt bridges. charge_pt = point(0.0,0.0,0.0) count = 0.0 indices = [] for index in residue: atom = self.AllAtoms[index] if atom.atomname.strip() == "NE2": charge_pt.x = charge_pt.x + atom.coordinates.x charge_pt.y = charge_pt.y + atom.coordinates.y charge_pt.z = charge_pt.z + atom.coordinates.z indices.append(index) count = count + 1.0 if atom.atomname.strip() == "ND1": charge_pt.x = charge_pt.x + atom.coordinates.x charge_pt.y = charge_pt.y + atom.coordinates.y charge_pt.z = charge_pt.z + atom.coordinates.z indices.append(index) count = count + 1.0 if atom.atomname.strip() == "HE2": indices.append(index) if atom.atomname.strip() == "HD1": indices.append(index) if atom.atomname.strip() == "CE1": indices.append(index) if atom.atomname.strip() == "CD2": indices.append(index) if atom.atomname.strip() == "CG": indices.append(index) if count != 0.0: charge_pt.x = charge_pt.x / count charge_pt.y = charge_pt.y / count charge_pt.z = charge_pt.z / count if charge_pt.x != 0.0 or charge_pt.y != 0.0 or charge_pt.z != 0.0: chrg = self.charged(charge_pt, indices, True) self.charges.append(chrg) if real_resname == "GLU" or real_resname == "GLH" or real_resname == "GLX": # regardless of protonation state, assume it's charged. This based on "The Cation-Pi Interaction," which suggests protonated state would be stabalized. charge_pt = point(0.0,0.0,0.0) count = 0.0 indices = [] for index in residue: atom = self.AllAtoms[index] if atom.atomname.strip() == "OE1": charge_pt.x = charge_pt.x + atom.coordinates.x charge_pt.y = charge_pt.y + atom.coordinates.y charge_pt.z = charge_pt.z + atom.coordinates.z indices.append(index) count = count + 1.0 if atom.atomname.strip() == "OE2": charge_pt.x = charge_pt.x + atom.coordinates.x charge_pt.y = charge_pt.y + atom.coordinates.y charge_pt.z = charge_pt.z + atom.coordinates.z indices.append(index) count = count + 1.0 if atom.atomname.strip() == "CD": indices.append(index) if count != 0.0: charge_pt.x = charge_pt.x / count charge_pt.y = charge_pt.y / count charge_pt.z = charge_pt.z / count if charge_pt.x != 0.0 or charge_pt.y != 0.0 or charge_pt.z != 0.0: chrg = self.charged(charge_pt, indices, False) # False because it's a negative charge self.charges.append(chrg) if real_resname == "ASP" or real_resname == "ASH" or real_resname == "ASX": # regardless of protonation state, assume it's charged. This based on "The Cation-Pi Interaction," which suggests protonated state would be stabalized. charge_pt = point(0.0,0.0,0.0) count = 0.0 indices = [] for index in residue: atom = self.AllAtoms[index] if atom.atomname.strip() == "OD1": charge_pt.x = charge_pt.x + atom.coordinates.x charge_pt.y = charge_pt.y + atom.coordinates.y charge_pt.z = charge_pt.z + atom.coordinates.z indices.append(index) count = count + 1.0 if atom.atomname.strip() == "OD2": charge_pt.x = charge_pt.x + atom.coordinates.x charge_pt.y = charge_pt.y + atom.coordinates.y charge_pt.z = charge_pt.z + atom.coordinates.z indices.append(index) count = count + 1.0 if atom.atomname.strip() == "CG": indices.append(index) if count != 0.0: charge_pt.x = charge_pt.x / count charge_pt.y = charge_pt.y / count charge_pt.z = charge_pt.z / count if charge_pt.x != 0.0 or charge_pt.y != 0.0 or charge_pt.z != 0.0: chrg = self.charged(charge_pt, indices, False) # False because it's a negative charge self.charges.append(chrg) class charged(): def __init__(self, coordinates, indices, positive): self.coordinates = coordinates self.indices = indices self.positive = positive # true or false to specifiy if positive or negative charge # Functions to identify aromatic rings # ==================================== def add_aromatic_marker(self, indicies_of_ring): # first identify the center point points_list = [] total = len(indicies_of_ring) x_sum = 0.0 y_sum = 0.0 z_sum = 0.0 for index in indicies_of_ring: atom = self.AllAtoms[index] points_list.append(atom.coordinates) x_sum = x_sum + atom.coordinates.x y_sum = y_sum + atom.coordinates.y z_sum = z_sum + atom.coordinates.z if total == 0: return # to prevent errors in some cases center = point(x_sum / total, y_sum / total, z_sum / total) # now get the radius of the aromatic ring radius = 0.0 for index in indicies_of_ring: atom = self.AllAtoms[index] dist = center.dist_to(atom.coordinates) if dist > radius: radius = dist # now get the plane that defines this ring if len(indicies_of_ring) < 3: return # to prevent an error in some cases. If there aren't three point, you can't define a plane elif len(indicies_of_ring) == 3: A = self.AllAtoms[indicies_of_ring[0]].coordinates B = self.AllAtoms[indicies_of_ring[1]].coordinates C = self.AllAtoms[indicies_of_ring[2]].coordinates elif len(indicies_of_ring) == 4: A = self.AllAtoms[indicies_of_ring[0]].coordinates B = self.AllAtoms[indicies_of_ring[1]].coordinates C = self.AllAtoms[indicies_of_ring[3]].coordinates else: # best, for 5 and 6 member rings A = self.AllAtoms[indicies_of_ring[0]].coordinates B = self.AllAtoms[indicies_of_ring[2]].coordinates C = self.AllAtoms[indicies_of_ring[4]].coordinates AB = self.functions.vector_subtraction(B,A) AC = self.functions.vector_subtraction(C,A) ABXAC = self.functions.CrossProduct(AB,AC) # formula for plane will be ax + by + cz = d x1 = self.AllAtoms[indicies_of_ring[0]].coordinates.x y1 = self.AllAtoms[indicies_of_ring[0]].coordinates.y z1 = self.AllAtoms[indicies_of_ring[0]].coordinates.z a = ABXAC.x b = ABXAC.y c = ABXAC.z d = a*x1 + b*y1 + c*z1 ar_ring = self.aromatic_ring(center, indicies_of_ring, [a,b,c,d], radius) self.aromatic_rings.append(ar_ring) class aromatic_ring(): def __init__(self, center, indices, plane_coeff, radius): self.center = center self.indices = indices self.plane_coeff = plane_coeff # a*x + b*y + c*z = dI think that self.radius = radius def assign_aromatic_rings(self): # Get all the rings containing each of the atoms in the ligand AllRings = [] for atom_index in self.NonProteinAtoms: AllRings.extend(self.all_rings_containing_atom(atom_index)) for ring_index_1 in range(len(AllRings)): ring1 = AllRings[ring_index_1] if len(ring1) != 0: for ring_index_2 in range(len(AllRings)): if ring_index_1 != ring_index_2: ring2 = AllRings[ring_index_2] if len(ring2) != 0: if self.set1_is_subset_of_set2(ring1, ring2) == True: AllRings[ring_index_2] = [] while [] in AllRings: AllRings.remove([]) # Now we need to figure out which of these ligands are aromatic (planar) for ring_index in range(len(AllRings)): ring = AllRings[ring_index] is_flat = True for t in range(-3, len(ring)-3): pt1 = self.NonProteinAtoms[ring[t]].coordinates pt2 = self.NonProteinAtoms[ring[t+1]].coordinates pt3 = self.NonProteinAtoms[ring[t+2]].coordinates pt4 = self.NonProteinAtoms[ring[t+3]].coordinates # first, let's see if the last atom in this ring is a carbon connected to four atoms. That would be a quick way of telling this is not an aromatic ring cur_atom = self.NonProteinAtoms[ring[t+3]] if cur_atom.element == "C" and cur_atom.NumberOfNeighbors() == 4: is_flat = False break # now check the dihedral between the ring atoms to see if it's flat angle = self.functions.dihedral(pt1, pt2, pt3, pt4) * 180 / math.pi if (angle > -165 and angle < -15) or (angle > 15 and angle < 165): # 15 degress is the cutoff #, ring[t], ring[t+1], ring[t+2], ring[t+3] # range of this function is -pi to pi is_flat = False break # now check the dihedral between the ring atoms and an atom connected to the current atom to see if that's flat too. for substituent_atom_index in cur_atom.IndeciesOfAtomsConnecting: pt_sub = self.NonProteinAtoms[substituent_atom_index].coordinates angle = self.functions.dihedral(pt2, pt3, pt4, pt_sub) * 180 / math.pi if (angle > -165 and angle < -15) or (angle > 15 and angle < 165): # 15 degress is the cutoff #, ring[t], ring[t+1], ring[t+2], ring[t+3] # range of this function is -pi to pi is_flat = False break if is_flat == False: AllRings[ring_index] = [] if len(ring) < 5: AllRings[ring_index] = [] # While I'm at it, three and four member rings are not aromatic if len(ring) > 6: AllRings[ring_index] = [] # While I'm at it, if the ring has more than 6, also throw it out. So only 5 and 6 member rings are allowed. while [] in AllRings: AllRings.remove([]) for ring in AllRings: self.add_aromatic_marker(ring) # Now that you've found all the rings in non-protein residues, it's time to look for aromatic rings in protein residues curr_res = "" first = True residue = [] for atom_index in self.AllAtoms: atom = self.AllAtoms[atom_index] key = atom.residue + "_" + str(atom.resid) + "_" + atom.chain if first == True: curr_res = key first = False if key != curr_res: self.assign_aromatic_rings_from_protein_process_residue(residue, last_key) residue = [] curr_res = key residue.append(atom_index) last_key = key self.assign_aromatic_rings_from_protein_process_residue(residue, last_key) def assign_aromatic_rings_from_protein_process_residue(self, residue, last_key): temp = last_key.strip().split("_") resname = temp[0] real_resname = resname[-3:] resid = temp[1] chain = temp[2] if real_resname == "PHE": indicies_of_ring = [] for index in residue: # written this way because order is important atom = self.AllAtoms[index] if atom.atomname.strip() == "CG": indicies_of_ring.append(index) for index in residue: # written this way because order is important atom = self.AllAtoms[index] if atom.atomname.strip() == "CD1": indicies_of_ring.append(index) for index in residue: # written this way because order is important atom = self.AllAtoms[index] if atom.atomname.strip() == "CE1": indicies_of_ring.append(index) for index in residue: # written this way because order is important atom = self.AllAtoms[index] if atom.atomname.strip() == "CZ": indicies_of_ring.append(index) for index in residue: # written this way because order is important atom = self.AllAtoms[index] if atom.atomname.strip() == "CE2": indicies_of_ring.append(index) for index in residue: # written this way because order is important atom = self.AllAtoms[index] if atom.atomname.strip() == "CD2": indicies_of_ring.append(index) self.add_aromatic_marker(indicies_of_ring) if real_resname == "TYR": indicies_of_ring = [] for index in residue: # written this way because order is important atom = self.AllAtoms[index] if atom.atomname.strip() == "CG": indicies_of_ring.append(index) for index in residue: # written this way because order is important atom = self.AllAtoms[index] if atom.atomname.strip() == "CD1": indicies_of_ring.append(index) for index in residue: # written this way because order is important atom = self.AllAtoms[index] if atom.atomname.strip() == "CE1": indicies_of_ring.append(index) for index in residue: # written this way because order is important atom = self.AllAtoms[index] if atom.atomname.strip() == "CZ": indicies_of_ring.append(index) for index in residue: # written this way because order is important atom = self.AllAtoms[index] if atom.atomname.strip() == "CE2": indicies_of_ring.append(index) for index in residue: # written this way because order is important atom = self.AllAtoms[index] if atom.atomname.strip() == "CD2": indicies_of_ring.append(index) self.add_aromatic_marker(indicies_of_ring) if real_resname == "HIS" or real_resname == "HID" or real_resname == "HIE" or real_resname == "HIP": indicies_of_ring = [] for index in residue: # written this way because order is important atom = self.AllAtoms[index] if atom.atomname.strip() == "CG": indicies_of_ring.append(index) for index in residue: # written this way because order is important atom = self.AllAtoms[index] if atom.atomname.strip() == "ND1": indicies_of_ring.append(index) for index in residue: # written this way because order is important atom = self.AllAtoms[index] if atom.atomname.strip() == "CE1": indicies_of_ring.append(index) for index in residue: # written this way because order is important atom = self.AllAtoms[index] if atom.atomname.strip() == "NE2": indicies_of_ring.append(index) for index in residue: # written this way because order is important atom = self.AllAtoms[index] if atom.atomname.strip() == "CD2": indicies_of_ring.append(index) self.add_aromatic_marker(indicies_of_ring) if real_resname == "TRP": indicies_of_ring = [] for index in residue: # written this way because order is important atom = self.AllAtoms[index] if atom.atomname.strip() == "CG": indicies_of_ring.append(index) for index in residue: # written this way because order is important atom = self.AllAtoms[index] if atom.atomname.strip() == "CD1": indicies_of_ring.append(index) for index in residue: # written this way because order is important atom = self.AllAtoms[index] if atom.atomname.strip() == "NE1": indicies_of_ring.append(index) for index in residue: # written this way because order is important atom = self.AllAtoms[index] if atom.atomname.strip() == "CE2": indicies_of_ring.append(index) for index in residue: # written this way because order is important atom = self.AllAtoms[index] if atom.atomname.strip() == "CD2": indicies_of_ring.append(index) self.add_aromatic_marker(indicies_of_ring) indicies_of_ring = [] for index in residue: # written this way because order is important atom = self.AllAtoms[index] if atom.atomname.strip() == "CE2": indicies_of_ring.append(index) for index in residue: # written this way because order is important atom = self.AllAtoms[index] if atom.atomname.strip() == "CD2": indicies_of_ring.append(index) for index in residue: # written this way because order is important atom = self.AllAtoms[index] if atom.atomname.strip() == "CE3": indicies_of_ring.append(index) for index in residue: # written this way because order is important atom = self.AllAtoms[index] if atom.atomname.strip() == "CZ3": indicies_of_ring.append(index) for index in residue: # written this way because order is important atom = self.AllAtoms[index] if atom.atomname.strip() == "CH2": indicies_of_ring.append(index) for index in residue: # written this way because order is important atom = self.AllAtoms[index] if atom.atomname.strip() == "CZ2": indicies_of_ring.append(index) self.add_aromatic_marker(indicies_of_ring) def set1_is_subset_of_set2(self, set1, set2): is_subset = True for item in set1: if not item in set2: is_subset = False break return is_subset def all_rings_containing_atom(self, index): AllRings = [] atom = self.AllAtoms[index] for conneceted_atom in atom.IndeciesOfAtomsConnecting: self.ring_recursive(conneceted_atom, [index], index, AllRings) return AllRings def ring_recursive(self, index, AlreadyCrossed, orig_atom, AllRings): if len(AlreadyCrossed) > 6: return # since you're only considering aromatic rings containing 5 or 6 members anyway, save yourself some time. atom = self.AllAtoms[index] temp = AlreadyCrossed[:] temp.append(index) for conneceted_atom in atom.IndeciesOfAtomsConnecting: if not conneceted_atom in AlreadyCrossed: self.ring_recursive(conneceted_atom, temp, orig_atom, AllRings) if conneceted_atom == orig_atom and orig_atom != AlreadyCrossed[-1]: AllRings.append(temp) # Functions to assign secondary structure to protein residues # =========================================================== def assign_secondary_structure(self): # first, we need to know what resid's are available resids = [] last_key = "-99999_Z" for atom_index in self.AllAtoms: atom = self.AllAtoms[atom_index] key = str(atom.resid) + "_" + atom.chain if key != last_key: last_key = key resids.append(last_key) structure = {} for resid in resids: structure[resid] = "OTHER" atoms = [] for atom_index in self.AllAtoms: atom = self.AllAtoms[atom_index] if atom.SideChainOrBackBone() == "BACKBONE": if len(atoms) < 8: atoms.append(atom) else: atoms.pop(0) atoms.append(atom) # now make sure the first four all have the same resid and the last four all have the same resid if atoms[0].resid == atoms[1].resid and atoms[0].resid == atoms[2].resid and atoms[0].resid == atoms[3].resid and atoms[0] != atoms[4].resid and atoms[4].resid == atoms[5].resid and atoms[4].resid == atoms[6].resid and atoms[4].resid == atoms[7].resid and atoms[0].resid + 1 == atoms[7].resid and atoms[0].chain == atoms[7].chain: resid1 = atoms[0].resid resid2 = atoms[7].resid # Now give easier to use names to the atoms for atom in atoms: if atom.resid == resid1 and atom.atomname.strip() == "N": first_N = atom if atom.resid == resid1 and atom.atomname.strip() == "C": first_C = atom if atom.resid == resid1 and atom.atomname.strip() == "CA": first_CA = atom if atom.resid == resid2 and atom.atomname.strip() == "N": second_N = atom if atom.resid == resid2 and atom.atomname.strip() == "C": second_C = atom if atom.resid == resid2 and atom.atomname.strip() == "CA": second_CA = atom # Now compute the phi and psi dihedral angles phi = self.functions.dihedral(first_C.coordinates, second_N.coordinates, second_CA.coordinates, second_C.coordinates) * 180.0 / math.pi psi = self.functions.dihedral(first_N.coordinates, first_CA.coordinates, first_C.coordinates, second_N.coordinates) * 180.0 / math.pi # Now use those angles to determine if it's alpha or beta if phi > -145 and phi < -35 and psi > -70 and psi < 50: key1 = str(first_C.resid) + "_" + first_C.chain key2 = str(second_C.resid) + "_" + second_C.chain structure[key1] = "ALPHA" structure[key2] = "ALPHA" if (phi >= -180 and phi < -40 and psi <= 180 and psi > 90) or (phi >= -180 and phi < -70 and psi <= -165): # beta. This gets some loops (by my eye), but it's the best I could do. key1 = str(first_C.resid) + "_" + first_C.chain key2 = str(second_C.resid) + "_" + second_C.chain structure[key1] = "BETA" structure[key2] = "BETA" # Now update each of the atoms with this structural information for atom_index in self.AllAtoms: atom = self.AllAtoms[atom_index] key = str(atom.resid) + "_" + atom.chain atom.structure = structure[key] # Some more post processing. CA_list = [] # first build a list of the indices of all the alpha carbons for atom_index in self.AllAtoms: atom = self.AllAtoms[atom_index] if atom.residue.strip() in self.protein_resnames and atom.atomname.strip() == "CA": CA_list.append(atom_index) # some more post processing. change = True while change == True: change = False # A residue of index i is only going to be in an alpha helix its CA is within 6 A of the CA of the residue i + 3 for CA_atom_index in CA_list: CA_atom = self.AllAtoms[CA_atom_index] if CA_atom.structure == "ALPHA": # so it's in an alpha helix another_alpha_is_close = False for other_CA_atom_index in CA_list: # so now compare that CA to all the other CA's other_CA_atom = self.AllAtoms[other_CA_atom_index] if other_CA_atom.structure == "ALPHA": # so it's also in an alpha helix if other_CA_atom.resid - 3 == CA_atom.resid or other_CA_atom.resid + 3 == CA_atom.resid: # so this CA atom is one of the ones the first atom might hydrogen bond with if other_CA_atom.coordinates.dist_to(CA_atom.coordinates) < 6.0: # so these two CA atoms are close enough together that their residues are probably hydrogen bonded another_alpha_is_close = True break if another_alpha_is_close == False: self.set_structure_of_residue(CA_atom.chain, CA_atom.resid, "OTHER") change = True # Alpha helices are only alpha helices if they span at least 4 residues (to wrap around and hydrogen bond). I'm going to require them to span at least 5 residues, based on examination of many structures. for index_in_list in range(len(CA_list)-5): index_in_pdb1 = CA_list[index_in_list] index_in_pdb2 = CA_list[index_in_list+1] index_in_pdb3 = CA_list[index_in_list+2] index_in_pdb4 = CA_list[index_in_list+3] index_in_pdb5 = CA_list[index_in_list+4] index_in_pdb6 = CA_list[index_in_list+5] atom1 = self.AllAtoms[index_in_pdb1] atom2 = self.AllAtoms[index_in_pdb2] atom3 = self.AllAtoms[index_in_pdb3] atom4 = self.AllAtoms[index_in_pdb4] atom5 = self.AllAtoms[index_in_pdb5] atom6 = self.AllAtoms[index_in_pdb6] if atom1.resid + 1 == atom2.resid and atom2.resid + 1 == atom3.resid and atom3.resid + 1 == atom4.resid and atom4.resid + 1 == atom5.resid and atom5.resid + 1 == atom6.resid: # so they are sequential if atom1.structure != "ALPHA" and atom2.structure == "ALPHA" and atom3.structure != "ALPHA": self.set_structure_of_residue(atom2.chain, atom2.resid, "OTHER") change = True if atom2.structure != "ALPHA" and atom3.structure == "ALPHA" and atom4.structure != "ALPHA": self.set_structure_of_residue(atom3.chain, atom3.resid, "OTHER") change = True if atom3.structure != "ALPHA" and atom4.structure == "ALPHA" and atom5.structure != "ALPHA": self.set_structure_of_residue(atom4.chain, atom4.resid, "OTHER") change = True if atom4.structure != "ALPHA" and atom5.structure == "ALPHA" and atom6.structure != "ALPHA": self.set_structure_of_residue(atom5.chain, atom5.resid, "OTHER") change = True if atom1.structure != "ALPHA" and atom2.structure == "ALPHA" and atom3.structure == "ALPHA" and atom4.structure != "ALPHA": self.set_structure_of_residue(atom2.chain, atom2.resid, "OTHER") self.set_structure_of_residue(atom3.chain, atom3.resid, "OTHER") change = True if atom2.structure != "ALPHA" and atom3.structure == "ALPHA" and atom4.structure == "ALPHA" and atom5.structure != "ALPHA": self.set_structure_of_residue(atom3.chain, atom3.resid, "OTHER") self.set_structure_of_residue(atom4.chain, atom4.resid, "OTHER") change = True if atom3.structure != "ALPHA" and atom4.structure == "ALPHA" and atom5.structure == "ALPHA" and atom6.structure != "ALPHA": self.set_structure_of_residue(atom4.chain, atom4.resid, "OTHER") self.set_structure_of_residue(atom5.chain, atom5.resid, "OTHER") change = True if atom1.structure != "ALPHA" and atom2.structure == "ALPHA" and atom3.structure == "ALPHA" and atom4.structure == "ALPHA" and atom5.structure != "ALPHA": self.set_structure_of_residue(atom2.chain, atom2.resid, "OTHER") self.set_structure_of_residue(atom3.chain, atom3.resid, "OTHER") self.set_structure_of_residue(atom4.chain, atom4.resid, "OTHER") change = True if atom2.structure != "ALPHA" and atom3.structure == "ALPHA" and atom4.structure == "ALPHA" and atom5.structure == "ALPHA" and atom6.structure != "ALPHA": self.set_structure_of_residue(atom3.chain, atom3.resid, "OTHER") self.set_structure_of_residue(atom4.chain, atom4.resid, "OTHER") self.set_structure_of_residue(atom5.chain, atom5.resid, "OTHER") change = True if atom1.structure != "ALPHA" and atom2.structure == "ALPHA" and atom3.structure == "ALPHA" and atom4.structure == "ALPHA" and atom5.structure == "ALPHA" and atom6.structure != "ALPHA": self.set_structure_of_residue(atom2.chain, atom2.resid, "OTHER") self.set_structure_of_residue(atom3.chain, atom3.resid, "OTHER") self.set_structure_of_residue(atom4.chain, atom4.resid, "OTHER") self.set_structure_of_residue(atom5.chain, atom5.resid, "OTHER") change = True # now go through each of the BETA CA atoms. A residue is only going to be called a beta sheet if CA atom is within 6.0 A of another CA beta, same chain, but index difference > 2. for CA_atom_index in CA_list: CA_atom = self.AllAtoms[CA_atom_index] if CA_atom.structure == "BETA": # so it's in a beta sheet another_beta_is_close = False for other_CA_atom_index in CA_list: if other_CA_atom_index != CA_atom_index: # so not comparing an atom to itself other_CA_atom = self.AllAtoms[other_CA_atom_index] if other_CA_atom.structure == "BETA": # so you're comparing it only to other BETA-sheet atoms if other_CA_atom.chain == CA_atom.chain: # so require them to be on the same chain. needed to indecies can be fairly compared if math.fabs(other_CA_atom.resid - CA_atom.resid) > 2: # so the two residues are not simply adjacent to each other on the chain if CA_atom.coordinates.dist_to(other_CA_atom.coordinates) < 6.0: # so these to atoms are close to each other another_beta_is_close = True break if another_beta_is_close == False: self.set_structure_of_residue(CA_atom.chain, CA_atom.resid, "OTHER") change = True # Now some more post-processing needs to be done. Do this again to clear up mess that may have just been created (single residue beta strand, for example) # Beta sheets are usually at least 3 residues long for index_in_list in range(len(CA_list)-3): index_in_pdb1 = CA_list[index_in_list] index_in_pdb2 = CA_list[index_in_list+1] index_in_pdb3 = CA_list[index_in_list+2] index_in_pdb4 = CA_list[index_in_list+3] atom1 = self.AllAtoms[index_in_pdb1] atom2 = self.AllAtoms[index_in_pdb2] atom3 = self.AllAtoms[index_in_pdb3] atom4 = self.AllAtoms[index_in_pdb4] if atom1.resid + 1 == atom2.resid and atom2.resid + 1 == atom3.resid and atom3.resid + 1 == atom4.resid: # so they are sequential if atom1.structure != "BETA" and atom2.structure == "BETA" and atom3.structure != "BETA": self.set_structure_of_residue(atom2.chain, atom2.resid, "OTHER") change = True if atom2.structure != "BETA" and atom3.structure == "BETA" and atom4.structure != "BETA": self.set_structure_of_residue(atom3.chain, atom3.resid, "OTHER") change = True if atom1.structure != "BETA" and atom2.structure == "BETA" and atom3.structure == "BETA" and atom4.structure != "BETA": self.set_structure_of_residue(atom2.chain, atom2.resid, "OTHER") self.set_structure_of_residue(atom3.chain, atom3.resid, "OTHER") change = True def set_structure_of_residue(self, chain, resid, structure): for atom_index in self.AllAtoms: atom = self.AllAtoms[atom_index] if atom.chain == chain and atom.resid == resid: atom.structure = structure class MathFunctions: def vector_subtraction(self, vector1, vector2): # vector1 - vector2 return point(vector1.x - vector2.x, vector1.y - vector2.y, vector1.z - vector2.z) def CrossProduct(self, Pt1, Pt2): # never tested Response = point(0,0,0) Response.x = Pt1.y * Pt2.z - Pt1.z * Pt2.y Response.y = Pt1.z * Pt2.x - Pt1.x * Pt2.z Response.z = Pt1.x * Pt2.y - Pt1.y * Pt2.x return Response; def vector_scalar_multiply(self, vector, scalar): return point(vector.x * scalar, vector.y * scalar, vector.z * scalar) def dot_product(self, point1, point2): return point1.x * point2.x + point1.y * point2.y + point1.z * point2.z def dihedral(self, point1, point2, point3, point4): # never tested b1 = self.vector_subtraction(point2, point1) b2 = self.vector_subtraction(point3, point2) b3 = self.vector_subtraction(point4, point3) b2Xb3 = self.CrossProduct(b2,b3) b1Xb2 = self.CrossProduct(b1,b2) b1XMagb2 = self.vector_scalar_multiply(b1,b2.Magnitude()) radians = math.atan2(self.dot_product(b1XMagb2,b2Xb3), self.dot_product(b1Xb2,b2Xb3)) return radians def angle_between_three_points(self, point1, point2, point3): # As in three connected atoms vector1 = self.vector_subtraction(point1, point2) vector2 = self.vector_subtraction(point3, point2) return self.angle_between_points(vector1, vector2) def angle_between_points(self, point1, point2): new_point1 = self.return_normalized_vector(point1) new_point2 = self.return_normalized_vector(point2) dot_prod = self.dot_product(new_point1, new_point2) if dot_prod > 1.0: dot_prod = 1.0 # to prevent errors that can rarely occur if dot_prod < -1.0: dot_prod = -1.0 return math.acos(dot_prod) def return_normalized_vector(self, vector): dist = self.distance(point(0,0,0), vector) return point(vector.x/dist, vector.y/dist, vector.z/dist) def distance(self, point1, point2): deltax = point1.x - point2.x deltay = point1.y - point2.y deltaz = point1.z - point2.z return math.sqrt(math.pow(deltax,2) + math.pow(deltay,2) + math.pow(deltaz,2)) def project_point_onto_plane(self, apoint, plane_coefficients): # essentially finds the point on the plane that is closest to the specified point # the plane_coefficients are [a,b,c,d], where the plane is ax + by + cz = d # First, define a plane using cooeficients a, b, c, d such that ax + by + cz = d a = plane_coefficients[0] b = plane_coefficients[1] c = plane_coefficients[2] d = plane_coefficients[3] # Now, define a point in space (s,u,v) s = apoint.x u = apoint.y v = apoint.z # the formula of a line perpendicular to the plan passing through (s,u,v) is: #x = s + at #y = u + bt #z = v + ct t = (d - a*s - b*u - c*v) / (a*a + b*b + c*c) # here's the point closest on the plane x = s + a*t y = u + b*t z = v + c*t return point(x,y,z) def getCommandOutput2(command): child = os.popen(command) data = child.read() err = child.close() if err: raise RuntimeError('%s failed w/ exit code %d' % (command, err)) return data class binana: functions = MathFunctions() # supporting functions def list_alphebetize_and_combine(self, list): list.sort() return '_'.join(list) def hashtable_entry_add_one(self, hashtable, key, toadd = 1): # note that dictionaries (hashtables) are passed by reference in python if key in hashtable: hashtable[key] = hashtable[key] + toadd else: hashtable[key] = toadd def extend_list_by_dictionary(self, list1, dictionary): # first, sort the dictionary by the key keys = list(dictionary.keys()) keys.sort() # now make a list of the values vals = [] for key in keys: vals.append( dictionary[key]) # now append vals to the list newlist = list1[:] newlist.extend(vals) # return the extended list return newlist def center(self, string, length): while len(string) < length: string = " " + string if len(string) < length: string = string + " " return string # The meat of the class #def __init__(self, ligand_pdbqt_filename, receptor_pdbqt_filename, parameters, line_header, actual_filename_if_ligand_is_list="", actual_filename_if_receptor_is_list=""): # must be a more elegant way of doing this def __init__(self, ligand_pdbqt_filename, receptor, parameters, line_header, actual_filename_if_ligand_is_list="", actual_filename_if_receptor_is_list=""): # must be a more elegant way of doing this receptor_pdbqt_filename = receptor.OrigFileName ligand = PDB() if actual_filename_if_ligand_is_list!="": # so a list was passed as the ligand ligand.LoadPDB_from_list(ligand_pdbqt_filename, line_header) # now write the file so when VINA is run it has a ligand file for input f = open(actual_filename_if_ligand_is_list,'w') for line in ligand_pdbqt_filename: if not "MODEL" in line and not "ENDMDL" in line: f.write(line) f.close() ligand_pdbqt_filename = actual_filename_if_ligand_is_list else: # so a filename was passed as the ligand ligand.LoadPDB_from_file(ligand_pdbqt_filename, line_header) #receptor = PDB() #if actual_filename_if_ligand_is_list=="": # receptor is a filename instead of a list # receptor.LoadPDB_from_file(receptor_pdbqt_filename, line_header) #else: # so it's a list that was passed, not a filename # receptor.LoadPDB_from_list(receptor_pdbqt_filename, line_header) # receptor_pdbqt_filename = actual_filename_if_receptor_is_list receptor.assign_secondary_structure() # Get distance measurements between protein and ligand atom types, as well as some other measurements ligand_receptor_atom_type_pairs_less_than_two_half = {} ligand_receptor_atom_type_pairs_less_than_four = {} ligand_receptor_atom_type_pairs_electrostatic = {} active_site_flexibility = {} hbonds = {} hydrophobics = {} ligand.rotateable_bonds_count functions = MathFunctions() pdb_close_contacts = PDB() pdb_contacts = PDB() pdb_contacts_alpha_helix = PDB() pdb_contacts_beta_sheet = PDB() pdb_contacts_other_2nd_structure = PDB() pdb_side_chain = PDB() pdb_back_bone = PDB() pdb_hydrophobic = PDB() pdb_hbonds = PDB() for ligand_atom_index in ligand.AllAtoms: for receptor_atom_index in receptor.AllAtoms: ligand_atom = ligand.AllAtoms[ligand_atom_index] receptor_atom = receptor.AllAtoms[receptor_atom_index] dist = ligand_atom.coordinates.dist_to(receptor_atom.coordinates) if dist < 2.5: # less than 2.5 A list = [ligand_atom.atomtype, receptor_atom.atomtype] self.hashtable_entry_add_one(ligand_receptor_atom_type_pairs_less_than_two_half, self.list_alphebetize_and_combine(list)) pdb_close_contacts.AddNewAtom(ligand_atom.copy_of()) pdb_close_contacts.AddNewAtom(receptor_atom.copy_of()) elif dist < 4.0: # less than 4 A list = [ligand_atom.atomtype, receptor_atom.atomtype] self.hashtable_entry_add_one(ligand_receptor_atom_type_pairs_less_than_four, self.list_alphebetize_and_combine(list)) pdb_contacts.AddNewAtom(ligand_atom.copy_of()) pdb_contacts.AddNewAtom(receptor_atom.copy_of()) if dist < 4.0: # calculate electrostatic energies for all less than 4 A ligand_charge = ligand_atom.charge receptor_charge = receptor_atom.charge coulomb_energy = (ligand_charge * receptor_charge / dist) * 138.94238460104697e4 # to convert into J/mol # might be nice to double check this list = [ligand_atom.atomtype, receptor_atom.atomtype] self.hashtable_entry_add_one(ligand_receptor_atom_type_pairs_electrostatic, self.list_alphebetize_and_combine(list), coulomb_energy) if dist < 4.0: # Now get statistics to judge active-site flexibility flexibility_key = receptor_atom.SideChainOrBackBone() + "_" + receptor_atom.structure # first can be sidechain or backbone, second back be alpha, beta, or other, so six catagories if receptor_atom.structure == "ALPHA": pdb_contacts_alpha_helix.AddNewAtom(receptor_atom.copy_of()) elif receptor_atom.structure == "BETA": pdb_contacts_beta_sheet.AddNewAtom(receptor_atom.copy_of()) elif receptor_atom.structure == "OTHER": pdb_contacts_other_2nd_structure.AddNewAtom(receptor_atom.copy_of()) if receptor_atom.SideChainOrBackBone() == "BACKBONE": pdb_back_bone.AddNewAtom(receptor_atom.copy_of()) elif receptor_atom.SideChainOrBackBone() == "SIDECHAIN": pdb_side_chain.AddNewAtom(receptor_atom.copy_of()) self.hashtable_entry_add_one(active_site_flexibility, flexibility_key) if dist < 4.0: # Now see if there's hydrophobic contacts (C-C contacts) if ligand_atom.element == "C" and receptor_atom.element == "C": hydrophobic_key = receptor_atom.SideChainOrBackBone() + "_" + receptor_atom.structure pdb_hydrophobic.AddNewAtom(ligand_atom.copy_of()) pdb_hydrophobic.AddNewAtom(receptor_atom.copy_of()) self.hashtable_entry_add_one(hydrophobics, hydrophobic_key) if dist < 4.0: # Now see if there's some sort of hydrogen bond between these two atoms. distance cutoff = 4, angle cutoff = 40. Note that this is liberal. if (ligand_atom.element == "O" or ligand_atom.element == "N") and (receptor_atom.element == "O" or receptor_atom.element == "N"): # now build a list of all the hydrogens close to these atoms hydrogens = [] for atm_index in ligand.AllAtoms: if ligand.AllAtoms[atm_index].element == "H": # so it's a hydrogen if ligand.AllAtoms[atm_index].coordinates.dist_to(ligand_atom.coordinates) < 1.3: # O-H distance is 0.96 A, N-H is 1.01 A. See http://www.science.uwaterloo.ca/~cchieh/cact/c120/bondel.html ligand.AllAtoms[atm_index].comment = "LIGAND" hydrogens.append(ligand.AllAtoms[atm_index]) for atm_index in receptor.AllAtoms: if receptor.AllAtoms[atm_index].element == "H": # so it's a hydrogen if receptor.AllAtoms[atm_index].coordinates.dist_to(receptor_atom.coordinates) < 1.3: # O-H distance is 0.96 A, N-H is 1.01 A. See http://www.science.uwaterloo.ca/~cchieh/cact/c120/bondel.html receptor.AllAtoms[atm_index].comment = "RECEPTOR" hydrogens.append(receptor.AllAtoms[atm_index]) # now we need to check the angles for hydrogen in hydrogens: if math.fabs(180 - functions.angle_between_three_points(ligand_atom.coordinates, hydrogen.coordinates, receptor_atom.coordinates) * 180.0 / math.pi) <= 40.0: hbonds_key = "HDONOR_" + hydrogen.comment + "_" + receptor_atom.SideChainOrBackBone() + "_" + receptor_atom.structure pdb_hbonds.AddNewAtom(ligand_atom.copy_of()) pdb_hbonds.AddNewAtom(hydrogen.copy_of()) pdb_hbonds.AddNewAtom(receptor_atom.copy_of()) self.hashtable_entry_add_one(hbonds, hbonds_key) # Get the total number of each atom type in the ligand ligand_atom_types = {} for ligand_atom_index in ligand.AllAtoms: ligand_atom = ligand.AllAtoms[ligand_atom_index] self.hashtable_entry_add_one(ligand_atom_types, ligand_atom.atomtype) pi_padding = 0.75 # This is perhaps controversial. I noticed that often a pi-cation interaction or other pi interaction was only slightly off, but looking at the structure, it was clearly supposed to be a # pi-cation interaction. I've decided then to artificially expand the radius of each pi ring. Think of this as adding in a VDW radius, or accounting for poor crystal-structure resolution, or whatever you want # to justify it. # Count pi-pi stacking and pi-T stacking interactions PI_interactions = {} pdb_pistack = PDB() pdb_pi_T = PDB() # "PI-Stacking Interactions ALIVE AND WELL IN PROTEINS" says distance of 7.5 A is good cutoff. This seems really big to me, except that pi-pi interactions (parallel) are actuall usually off centered. Interesting paper. # Note that adenine and tryptophan count as two aromatic rings. So, for example, an interaction between these two, if positioned correctly, could count for 4 pi-pi interactions. #print ligand.aromatic_rings, "****" #print receptor.aromatic_rings,"****" for aromatic1 in ligand.aromatic_rings: #print "dude" for aromatic2 in receptor.aromatic_rings: dist = aromatic1.center.dist_to(aromatic2.center) if dist < 7.5: # so there could be some pi-pi interactions. # first, let's check for stacking interactions. Are the two pi's roughly parallel? aromatic1_norm_vector = point(aromatic1.plane_coeff[0], aromatic1.plane_coeff[1], aromatic1.plane_coeff[2]) aromatic2_norm_vector = point(aromatic2.plane_coeff[0], aromatic2.plane_coeff[1], aromatic2.plane_coeff[2]) angle_between_planes = self.functions.angle_between_points(aromatic1_norm_vector, aromatic2_norm_vector) * 180.0/math.pi if math.fabs(angle_between_planes-0) < 30.0 or math.fabs(angle_between_planes-180) < 30.0: # so they're more or less parallel, it's probably pi-pi stackingoutput_dir # now, pi-pi are not usually right on top of each other. They're often staggared. So I don't want to just look at the centers of the rings and compare. Let's look at each of the atoms. # do atom of the atoms of one ring, when projected onto the plane of the other, fall within that other ring? pi_pi = False # start by assuming it's not a pi-pi stacking interaction for ligand_ring_index in aromatic1.indices: # project the ligand atom onto the plane of the receptor ring pt_on_receptor_plane = self.functions.project_point_onto_plane(ligand.AllAtoms[ligand_ring_index].coordinates, aromatic2.plane_coeff) if pt_on_receptor_plane.dist_to(aromatic2.center) <= aromatic2.radius + pi_padding: pi_pi = True break if pi_pi == False: # if you've already determined it's a pi-pi stacking interaction, no need to keep trying for receptor_ring_index in aromatic2.indices: # project the ligand atom onto the plane of the receptor ring pt_on_ligand_plane = self.functions.project_point_onto_plane(receptor.AllAtoms[receptor_ring_index].coordinates, aromatic1.plane_coeff) if pt_on_ligand_plane.dist_to(aromatic1.center) <= aromatic1.radius + pi_padding: pi_pi = True break if pi_pi == True: structure = receptor.AllAtoms[aromatic2.indices[0]].structure if structure == "": structure = "OTHER" # since it could be interacting with a cofactor or something key = "STACKING_" + structure for index in aromatic1.indices: pdb_pistack.AddNewAtom(ligand.AllAtoms[index].copy_of()) for index in aromatic2.indices: pdb_pistack.AddNewAtom(receptor.AllAtoms[index].copy_of()) self.hashtable_entry_add_one(PI_interactions, key) elif math.fabs(angle_between_planes-90) < 30.0 or math.fabs(angle_between_planes-270) < 30.0: # so they're more or less perpendicular, it's probably a pi-edge interaction #print "dude" # having looked at many structures, I noticed the algorithm was identifying T-pi reactions when the two rings were in fact quite distant, often with other atoms # in between. Eye-balling it, requiring that at their closest they be at least 5 A apart seems to separate the good T's from the bad min_dist = 100.0 for ligand_ind in aromatic1.indices: ligand_at = ligand.AllAtoms[ligand_ind] for receptor_ind in aromatic2.indices: receptor_at = receptor.AllAtoms[receptor_ind] dist = ligand_at.coordinates.dist_to(receptor_at.coordinates) if dist < min_dist: min_dist = dist if min_dist <= 5.0: # so at their closest points, the two rings come within 5 A of each other. # okay, is the ligand pi pointing into the receptor pi, or the other way around? # first, project the center of the ligand pi onto the plane of the receptor pi, and vs. versa # This could be directional somehow, like a hydrogen bond. pt_on_receptor_plane = self.functions.project_point_onto_plane(aromatic1.center, aromatic2.plane_coeff) pt_on_lignad_plane = self.functions.project_point_onto_plane(aromatic2.center, aromatic1.plane_coeff) # now, if it's a true pi-T interaction, this projected point should fall within the ring whose plane it's been projected into. if (pt_on_receptor_plane.dist_to(aromatic2.center) <= aromatic2.radius + pi_padding) or (pt_on_lignad_plane.dist_to(aromatic1.center) <= aromatic1.radius + pi_padding): # so it is in the ring on the projected plane. structure = receptor.AllAtoms[aromatic2.indices[0]].structure if structure == "": structure = "OTHER" # since it could be interacting with a cofactor or something key = "T-SHAPED_" + structure for index in aromatic1.indices: pdb_pi_T.AddNewAtom(ligand.AllAtoms[index].copy_of()) for index in aromatic2.indices: pdb_pi_T.AddNewAtom(receptor.AllAtoms[index].copy_of()) self.hashtable_entry_add_one(PI_interactions, key) # Now identify pi-cation interactions pdb_pi_cat = PDB() for aromatic in receptor.aromatic_rings: for charged in ligand.charges: if charged.positive == True: # so only consider positive charges if charged.coordinates.dist_to(aromatic.center) < 6.0: # distance cutoff based on "Cation-pi interactions in structural biology." # project the charged onto the plane of the aromatic charge_projected = self.functions.project_point_onto_plane(charged.coordinates,aromatic.plane_coeff) if charge_projected.dist_to(aromatic.center) < aromatic.radius + pi_padding: structure = receptor.AllAtoms[aromatic.indices[0]].structure if structure == "": structure = "OTHER" # since it could be interacting with a cofactor or something key = "PI-CATION_LIGAND-CHARGED_" + structure for index in aromatic.indices: pdb_pi_cat.AddNewAtom(receptor.AllAtoms[index].copy_of()) for index in charged.indices: pdb_pi_cat.AddNewAtom(ligand.AllAtoms[index].copy_of()) self.hashtable_entry_add_one(PI_interactions, key) for aromatic in ligand.aromatic_rings: # now it's the ligand that has the aromatic group for charged in receptor.charges: if charged.positive == True: # so only consider positive charges if charged.coordinates.dist_to(aromatic.center) < 6.0: # distance cutoff based on "Cation-pi interactions in structural biology." # project the charged onto the plane of the aromatic charge_projected = self.functions.project_point_onto_plane(charged.coordinates,aromatic.plane_coeff) if charge_projected.dist_to(aromatic.center) < aromatic.radius + pi_padding: structure = receptor.AllAtoms[charged.indices[0]].structure if structure == "": structure = "OTHER" # since it could be interacting with a cofactor or something key = "PI-CATION_RECEPTOR-CHARGED_" + structure for index in aromatic.indices: pdb_pi_cat.AddNewAtom(ligand.AllAtoms[index].copy_of()) for index in charged.indices: pdb_pi_cat.AddNewAtom(receptor.AllAtoms[index].copy_of()) self.hashtable_entry_add_one(PI_interactions, key) # now count the number of salt bridges pdb_salt_bridges = PDB() salt_bridges = {} for receptor_charge in receptor.charges: for ligand_charge in ligand.charges: if ligand_charge.positive != receptor_charge.positive: # so they have oppositve charges if ligand_charge.coordinates.dist_to(receptor_charge.coordinates) < 5.5: # 4 is good cutoff for salt bridges according to "Close-Range Electrostatic Interactions in Proteins", but looking at complexes, I decided to go with 5.5 A structure = receptor.AllAtoms[receptor_charge.indices[0]].structure if structure == "": structure = "OTHER" # since it could be interacting with a cofactor or something key = "SALT-BRIDGE_" + structure for index in receptor_charge.indices: pdb_salt_bridges.AddNewAtom(receptor.AllAtoms[index].copy_of()) for index in ligand_charge.indices: pdb_salt_bridges.AddNewAtom(ligand.AllAtoms[index].copy_of()) self.hashtable_entry_add_one(salt_bridges, key) # Now save the files preface ="REMARK " # if an output directory is specified, and it doesn't exist, create it #if parameters.params['output_dir'] != "": # if not os.path.exists(parameters.params['output_dir']): # os.mkdir(parameters.params['output_dir']) # Now get vina vina_output = getCommandOutput2(parameters.params['vina_executable'] + ' --score_only --receptor ' + receptor_pdbqt_filename + ' --ligand ' + ligand_pdbqt_filename) # if ligand_pdbqt_filename was originally passed as a list instead of a filename, delete the temporary file that was created to accomodate vina #if "MODEL" in ligand_pdbqt_filename: # os.remove(ligand_pdbqt_filename) # print "REMOVE THIS SECTION FOR PRODUCTION!!!" #os.remove(ligand_pdbqt_filename) #print vina_output vina_output = vina_output.split("\n") vina_affinity = 0.0 vina_gauss_1 = 0.0 vina_gauss_2 = 0.0 vina_repulsion = 0.0 vina_hydrophobic = 0.0 vina_hydrogen = 0.0 for item in vina_output: item = item.strip() if "Affinity" in item: vina_affinity = float(item.replace("Affinity: ","").replace(" (kcal/mol)","")) if "gauss 1" in item: vina_gauss_1 = float(item.replace("gauss 1 : ","")) if "gauss 2" in item: vina_gauss_2 = float(item.replace("gauss 2 : ","")) if "repulsion" in item: vina_repulsion = float(item.replace("repulsion : ","")) if "hydrophobic" in item: vina_hydrophobic = float(item.replace("hydrophobic : ","")) if "Hydrogen" in item: vina_hydrogen = float(item.replace("Hydrogen : ","")) vina_output = [vina_affinity, vina_gauss_1, vina_gauss_2, vina_repulsion, vina_hydrophobic, vina_hydrogen] stacking = [] t_shaped = [] pi_cation = [] for key in PI_interactions: value = PI_interactions[key] together = key + "_" + str(value) # not that this is put together strangely!!! if "STACKING" in together: stacking.append(together) if "CATION" in together: pi_cation.append(together) if "SHAPED" in together: t_shaped.append(together) # now create a single descriptor object data = {} data['vina_output'] = vina_output data['ligand_receptor_atom_type_pairs_less_than_two_half'] = ligand_receptor_atom_type_pairs_less_than_two_half data['ligand_receptor_atom_type_pairs_less_than_four'] = ligand_receptor_atom_type_pairs_less_than_four data['ligand_atom_types'] = ligand_atom_types data['ligand_receptor_atom_type_pairs_electrostatic'] = ligand_receptor_atom_type_pairs_electrostatic data['rotateable_bonds_count'] = ligand.rotateable_bonds_count data['active_site_flexibility'] = active_site_flexibility data['hbonds'] = hbonds data['hydrophobics'] = hydrophobics data['stacking'] = stacking data['pi_cation'] = pi_cation data['t_shaped'] = t_shaped data['salt_bridges'] = salt_bridges self.vina_output = data['vina_output'] self.rotateable_bonds_count = {'rot_bonds':data['rotateable_bonds_count']} self.ligand_receptor_atom_type_pairs_less_than_two_half = {"A_A": 0, "A_C": 0, "A_CL": 0, "A_F": 0, "A_FE": 0, "A_MG": 0, "A_MN": 0, "A_NA": 0, "A_SA": 0, "BR_C": 0, "BR_OA": 0, "C_CL": 0, "CD_OA": 0, "CL_FE": 0, "CL_MG": 0, "CL_N": 0, "CL_OA": 0, "CL_ZN": 0, "C_MN": 0, "C_NA": 0, "F_N": 0, "F_SA": 0, "F_ZN": 0, "HD_MN": 0, "MN_N": 0, "NA_SA": 0, "N_SA": 0, "A_HD": 0, "A_N": 0, "A_OA": 0, "A_ZN": 0, "BR_HD": 0, "C_C": 0, "C_F": 0, "C_HD": 0, "CL_HD": 0, "C_MG": 0, "C_N": 0, "C_OA": 0, "C_SA": 0, "C_ZN": 0, "FE_HD": 0, "FE_N": 0, "FE_OA": 0, "F_HD": 0, "F_OA": 0, "HD_HD": 0, "HD_I": 0, "HD_MG": 0, "HD_N": 0, "HD_NA": 0, "HD_OA": 0, "HD_P": 0, "HD_S": 0, "HD_SA": 0, "HD_ZN": 0, "MG_NA": 0, "MG_OA": 0, "MN_OA": 0, "NA_OA": 0, "NA_ZN": 0, "N_N": 0, "N_NA": 0, "N_OA": 0, "N_ZN": 0, "OA_OA": 0, "OA_SA": 0, "OA_ZN": 0, "SA_ZN": 0, "S_ZN": 0} for key in data['ligand_receptor_atom_type_pairs_less_than_two_half']: if not key in self.ligand_receptor_atom_type_pairs_less_than_two_half: print ("\tWARNING: Atoms of types " + key.replace("_"," and ") + " come within 2.5 angstroms of each other.") print ("\t The neural networks were not trained to deal with this juxtaposition,") print ("\t so it will be ignored.") self.error = True else: self.ligand_receptor_atom_type_pairs_less_than_two_half[key] = data['ligand_receptor_atom_type_pairs_less_than_two_half'][key] self.ligand_receptor_atom_type_pairs_less_than_four = {"A_CU": 0, "A_MG": 0, "A_MN": 0, "BR_SA": 0, "C_CD": 0, "CL_FE": 0, "CL_MG": 0, "CL_MN": 0, "CL_NA": 0, "CL_P": 0, "CL_S": 0, "CL_ZN": 0, "CU_HD": 0, "CU_N": 0, "FE_NA": 0, "FE_SA": 0, "MG_N": 0, "MG_S": 0, "MG_SA": 0, "MN_NA": 0, "MN_S": 0, "MN_SA": 0, "NA_P": 0, "P_S": 0, "P_SA": 0, "S_SA": 0, "A_A": 0, "A_BR": 0, "A_C": 0, "A_CL": 0, "A_F": 0, "A_FE": 0, "A_HD": 0, "A_I": 0, "A_N": 0, "A_NA": 0, "A_OA": 0, "A_P": 0, "A_S": 0, "A_SA": 0, "A_ZN": 0, "BR_C": 0, "BR_HD": 0, "BR_N": 0, "BR_OA": 0, "C_C": 0, "C_CL": 0, "C_F": 0, "C_FE": 0, "C_HD": 0, "C_I": 0, "CL_HD": 0, "CL_N": 0, "CL_OA": 0, "CL_SA": 0, "C_MG": 0, "C_MN": 0, "C_N": 0, "C_NA": 0, "C_OA": 0, "C_P": 0, "C_S": 0, "C_SA": 0, "C_ZN": 0, "FE_HD": 0, "FE_N": 0, "FE_OA": 0, "F_HD": 0, "F_N": 0, "F_OA": 0, "F_SA": 0, "HD_HD": 0, "HD_I": 0, "HD_MG": 0, "HD_MN": 0, "HD_N": 0, "HD_NA": 0, "HD_OA": 0, "HD_P": 0, "HD_S": 0, "HD_SA": 0, "HD_ZN": 0, "I_N": 0, "I_OA": 0, "MG_NA": 0, "MG_OA": 0, "MG_P": 0, "MN_N": 0, "MN_OA": 0, "MN_P": 0, "NA_OA": 0, "NA_S": 0, "NA_SA": 0, "NA_ZN": 0, "N_N": 0, "N_NA": 0, "N_OA": 0, "N_P": 0, "N_S": 0, "N_SA": 0, "N_ZN": 0, "OA_OA": 0, "OA_P": 0, "OA_S": 0, "OA_SA": 0, "OA_ZN": 0, "P_ZN": 0, "SA_SA": 0, "SA_ZN": 0, "S_ZN": 0} for key in data['ligand_receptor_atom_type_pairs_less_than_four']: if not key in self.ligand_receptor_atom_type_pairs_less_than_four: print ("\tWARNING: Atoms of types " + key.replace("_"," and ") + " come within 4 angstroms of each other.") print ("\t The neural networks were not trained to deal with this juxtaposition,") print ("\t so it will be ignored.") self.error = True else: self.ligand_receptor_atom_type_pairs_less_than_four[key] = data['ligand_receptor_atom_type_pairs_less_than_four'][key] self.ligand_atom_types = {'A': 0, 'BR': 0, 'C': 0, 'CL': 0, 'F': 0, 'HD': 0, 'I': 0, 'N': 0, 'NA': 0, 'OA': 0, 'P': 0, 'S': 0, 'SA': 0} for key in data['ligand_atom_types']: if not key in self.ligand_atom_types: print ("\tWARNING: The ligand contains an atoms of type " + key + ". The neural networks") print ("\t were not trained to deal with this ligand atom type, so it will be ignored.") self.error = True else: self.ligand_atom_types[key] = data['ligand_atom_types'][key] self.ligand_receptor_atom_type_pairs_electrostatic = {"A_MG": 0, "A_MN": 0, "BR_SA": 0, "CL_FE": 0, "CL_MG": 0, "CL_MN": 0, "CL_NA": 0, "CL_P": 0, "CL_S": 0, "CL_ZN": 0, "CU_HD": 0, "CU_N": 0, "FE_NA": 0, "FE_SA": 0, "MG_N": 0, "MG_S": 0, "MG_SA": 0, "MN_NA": 0, "MN_S": 0, "MN_SA": 0, "NA_P": 0, "P_S": 0, "P_SA": 0, "S_SA": 0, "A_A": 0.0, "A_BR": 0.0, "A_C": 0.0, "A_CL": 0.0, "A_F": 0.0, "A_FE": 0.0, "A_HD": 0.0, "A_I": 0.0, "A_N": 0.0, "A_NA": 0.0, "A_OA": 0.0, "A_P": 0.0, "A_S": 0.0, "A_SA": 0.0, "A_ZN": 0.0, "BR_C": 0.0, "BR_HD": 0.0, "BR_N": 0.0, "BR_OA": 0.0, "C_C": 0.0, "C_CL": 0.0, "C_F": 0.0, "C_FE": 0.0, "C_HD": 0.0, "C_I": 0.0, "CL_HD": 0.0, "CL_N": 0.0, "CL_OA": 0.0, "CL_SA": 0.0, "C_MG": 0.0, "C_MN": 0.0, "C_N": 0.0, "C_NA": 0.0, "C_OA": 0.0, "C_P": 0.0, "C_S": 0.0, "C_SA": 0.0, "C_ZN": 0.0, "FE_HD": 0.0, "FE_N": 0.0, "FE_OA": 0.0, "F_HD": 0.0, "F_N": 0.0, "F_OA": 0.0, "F_SA": 0.0, "HD_HD": 0.0, "HD_I": 0.0, "HD_MG": 0.0, "HD_MN": 0.0, "HD_N": 0.0, "HD_NA": 0.0, "HD_OA": 0.0, "HD_P": 0.0, "HD_S": 0.0, "HD_SA": 0.0, "HD_ZN": 0.0, "I_N": 0.0, "I_OA": 0.0, "MG_NA": 0.0, "MG_OA": 0.0, "MG_P": 0.0, "MN_N": 0.0, "MN_OA": 0.0, "MN_P": 0.0, "NA_OA": 0.0, "NA_S": 0.0, "NA_SA": 0.0, "NA_ZN": 0.0, "N_N": 0.0, "N_NA": 0.0, "N_OA": 0.0, "N_P": 0.0, "N_S": 0.0, "N_SA": 0.0, "N_ZN": 0.0, "OA_OA": 0.0, "OA_P": 0.0, "OA_S": 0.0, "OA_SA": 0.0, "OA_ZN": 0.0, "P_ZN": 0.0, "SA_SA": 0.0, "SA_ZN": 0.0, "S_ZN": 0, "F_ZN": 0} for key in data['ligand_receptor_atom_type_pairs_electrostatic']: if not key in self.ligand_receptor_atom_type_pairs_electrostatic: print ("\tWARNING: Atoms of types " + key.replace("_"," and ") + ", which come within 4 angstroms of each") print ("\t other, may interact electrostatically. However, the neural networks") print ("\t were not trained to deal with electrostatic interactions between atoms") print ("\t of these types, so they will be ignored.") self.error = True else: self.ligand_receptor_atom_type_pairs_electrostatic[key] = data['ligand_receptor_atom_type_pairs_electrostatic'][key] self.active_site_flexibility = {'BACKBONE_ALPHA': 0, 'BACKBONE_BETA': 0, 'BACKBONE_OTHER': 0, 'SIDECHAIN_ALPHA': 0, 'SIDECHAIN_BETA': 0, 'SIDECHAIN_OTHER': 0} for key in data['active_site_flexibility']: self.active_site_flexibility[key] = data['active_site_flexibility'][key] alpha_tmp = self.active_site_flexibility['BACKBONE_ALPHA'] + self.active_site_flexibility['SIDECHAIN_ALPHA'] beta_tmp = self.active_site_flexibility['BACKBONE_BETA'] + self.active_site_flexibility['SIDECHAIN_BETA'] other_tmp = self.active_site_flexibility['BACKBONE_OTHER'] + self.active_site_flexibility['SIDECHAIN_OTHER'] self.active_site_flexibility_by_structure = {'ALPHA':alpha_tmp, 'BETA':beta_tmp, 'OTHER':other_tmp} backbone_tmp = self.active_site_flexibility['BACKBONE_ALPHA'] + self.active_site_flexibility['BACKBONE_BETA'] + self.active_site_flexibility['BACKBONE_OTHER'] sidechain_tmp = self.active_site_flexibility['SIDECHAIN_ALPHA'] + self.active_site_flexibility['SIDECHAIN_BETA'] + self.active_site_flexibility['SIDECHAIN_OTHER'] self.active_site_flexibility_by_backbone_or_sidechain = {'BACKBONE':backbone_tmp, 'SIDECHAIN':sidechain_tmp} all = self.active_site_flexibility['BACKBONE_ALPHA'] + self.active_site_flexibility['BACKBONE_BETA'] + self.active_site_flexibility['BACKBONE_OTHER'] + self.active_site_flexibility['SIDECHAIN_ALPHA'] + self.active_site_flexibility['SIDECHAIN_BETA'] + self.active_site_flexibility['SIDECHAIN_OTHER'] self.active_site_flexibility_all = {'all': all} self.hbonds = {'HDONOR-LIGAND_BACKBONE_ALPHA': 0, 'HDONOR-LIGAND_BACKBONE_BETA': 0, 'HDONOR-LIGAND_BACKBONE_OTHER': 0, 'HDONOR-LIGAND_SIDECHAIN_ALPHA': 0, 'HDONOR-LIGAND_SIDECHAIN_BETA': 0, 'HDONOR-LIGAND_SIDECHAIN_OTHER': 0, 'HDONOR-RECEPTOR_BACKBONE_ALPHA': 0, 'HDONOR-RECEPTOR_BACKBONE_BETA': 0, 'HDONOR-RECEPTOR_BACKBONE_OTHER': 0, 'HDONOR-RECEPTOR_SIDECHAIN_ALPHA': 0, 'HDONOR-RECEPTOR_SIDECHAIN_BETA': 0, 'HDONOR-RECEPTOR_SIDECHAIN_OTHER': 0} for key in data['hbonds']: key2 = key.replace("HDONOR_","HDONOR-") self.hbonds[key2] = data['hbonds'][key] hdonor_ligand = self.hbonds['HDONOR-LIGAND_BACKBONE_ALPHA'] + self.hbonds['HDONOR-LIGAND_BACKBONE_BETA'] + self.hbonds['HDONOR-LIGAND_BACKBONE_OTHER'] + self.hbonds['HDONOR-LIGAND_SIDECHAIN_ALPHA'] + self.hbonds['HDONOR-LIGAND_SIDECHAIN_BETA'] + self.hbonds['HDONOR-LIGAND_SIDECHAIN_OTHER'] hdonor_receptor = self.hbonds['HDONOR-RECEPTOR_BACKBONE_ALPHA'] + self.hbonds['HDONOR-RECEPTOR_BACKBONE_BETA'] + self.hbonds['HDONOR-RECEPTOR_BACKBONE_OTHER'] + self.hbonds['HDONOR-RECEPTOR_SIDECHAIN_ALPHA'] + self.hbonds['HDONOR-RECEPTOR_SIDECHAIN_BETA'] + self.hbonds['HDONOR-RECEPTOR_SIDECHAIN_OTHER'] self.hbonds_by_location_of_hdonor = {'LIGAND':hdonor_ligand, 'RECEPTOR':hdonor_receptor} hbond_backbone = self.hbonds['HDONOR-LIGAND_BACKBONE_ALPHA'] + self.hbonds['HDONOR-LIGAND_BACKBONE_BETA'] + self.hbonds['HDONOR-LIGAND_BACKBONE_OTHER'] + self.hbonds['HDONOR-RECEPTOR_BACKBONE_ALPHA'] + self.hbonds['HDONOR-RECEPTOR_BACKBONE_BETA'] + self.hbonds['HDONOR-RECEPTOR_BACKBONE_OTHER'] hbond_sidechain = self.hbonds['HDONOR-LIGAND_SIDECHAIN_ALPHA'] + self.hbonds['HDONOR-LIGAND_SIDECHAIN_BETA'] + self.hbonds['HDONOR-LIGAND_SIDECHAIN_OTHER'] + self.hbonds['HDONOR-RECEPTOR_SIDECHAIN_ALPHA'] + self.hbonds['HDONOR-RECEPTOR_SIDECHAIN_BETA'] + self.hbonds['HDONOR-RECEPTOR_SIDECHAIN_OTHER'] self.hbonds_by_backbone_or_sidechain = {'BACKBONE':hbond_backbone, 'SIDECHAIN':hbond_sidechain} hbond_alpha = self.hbonds['HDONOR-LIGAND_BACKBONE_ALPHA'] + self.hbonds['HDONOR-LIGAND_SIDECHAIN_ALPHA'] + self.hbonds['HDONOR-RECEPTOR_BACKBONE_ALPHA'] + self.hbonds['HDONOR-RECEPTOR_SIDECHAIN_ALPHA'] hbond_beta = self.hbonds['HDONOR-LIGAND_BACKBONE_BETA'] + self.hbonds['HDONOR-LIGAND_SIDECHAIN_BETA'] + self.hbonds['HDONOR-RECEPTOR_BACKBONE_BETA'] + self.hbonds['HDONOR-RECEPTOR_SIDECHAIN_BETA'] hbond_other = self.hbonds['HDONOR-LIGAND_BACKBONE_OTHER'] + self.hbonds['HDONOR-LIGAND_SIDECHAIN_OTHER'] + self.hbonds['HDONOR-RECEPTOR_BACKBONE_OTHER'] + self.hbonds['HDONOR-RECEPTOR_SIDECHAIN_OTHER'] self.hbonds_by_structure = {'ALPHA':hbond_alpha, 'BETA':hbond_beta, 'OTHER':hbond_other} all = self.hbonds['HDONOR-LIGAND_BACKBONE_ALPHA'] + self.hbonds['HDONOR-LIGAND_BACKBONE_BETA'] + self.hbonds['HDONOR-LIGAND_BACKBONE_OTHER'] + self.hbonds['HDONOR-LIGAND_SIDECHAIN_ALPHA'] + self.hbonds['HDONOR-LIGAND_SIDECHAIN_BETA'] + self.hbonds['HDONOR-LIGAND_SIDECHAIN_OTHER'] + self.hbonds['HDONOR-RECEPTOR_BACKBONE_ALPHA'] + self.hbonds['HDONOR-RECEPTOR_BACKBONE_BETA'] + self.hbonds['HDONOR-RECEPTOR_BACKBONE_OTHER'] + self.hbonds['HDONOR-RECEPTOR_SIDECHAIN_ALPHA'] + self.hbonds['HDONOR-RECEPTOR_SIDECHAIN_BETA'] + self.hbonds['HDONOR-RECEPTOR_SIDECHAIN_OTHER'] self.hbonds_all = {'all':all} self.hydrophobics = {'BACKBONE_ALPHA': 0, 'BACKBONE_BETA': 0, 'BACKBONE_OTHER': 0, 'SIDECHAIN_ALPHA': 0, 'SIDECHAIN_BETA': 0, 'SIDECHAIN_OTHER': 0} for key in data['hydrophobics']: self.hydrophobics[key] = data['hydrophobics'][key] alpha_tmp = self.hydrophobics['BACKBONE_ALPHA'] + self.hydrophobics['SIDECHAIN_ALPHA'] beta_tmp = self.hydrophobics['BACKBONE_BETA'] + self.hydrophobics['SIDECHAIN_BETA'] other_tmp = self.hydrophobics['BACKBONE_OTHER'] + self.hydrophobics['SIDECHAIN_OTHER'] self.hydrophobics_by_structure = {'ALPHA':alpha_tmp, 'BETA':beta_tmp, 'OTHER':other_tmp} backbone_tmp = self.hydrophobics['BACKBONE_ALPHA'] + self.hydrophobics['BACKBONE_BETA'] + self.hydrophobics['BACKBONE_OTHER'] sidechain_tmp = self.hydrophobics['SIDECHAIN_ALPHA'] + self.hydrophobics['SIDECHAIN_BETA'] + self.hydrophobics['SIDECHAIN_OTHER'] self.hydrophobics_by_backbone_or_sidechain = {'BACKBONE':backbone_tmp, 'SIDECHAIN':sidechain_tmp} all = self.hydrophobics['BACKBONE_ALPHA'] + self.hydrophobics['BACKBONE_BETA'] + self.hydrophobics['BACKBONE_OTHER'] + self.hydrophobics['SIDECHAIN_ALPHA'] + self.hydrophobics['SIDECHAIN_BETA'] + self.hydrophobics['SIDECHAIN_OTHER'] self.hydrophobics_all = {'all':all} stacking_tmp = {} for item in data['stacking']: item = item.split("_") stacking_tmp[item[1]] = int(item[2]) self.stacking = {'ALPHA': 0, 'BETA': 0, 'OTHER': 0} for key in stacking_tmp: self.stacking[key] = stacking_tmp[key] all = self.stacking['ALPHA'] + self.stacking['BETA'] + self.stacking['OTHER'] self.stacking_all = {'all': all} pi_cation_tmp = {} for item in data['pi_cation']: item = item.split("_") pi_cation_tmp[item[1] + "_" + item[2]] = int(item[3]) self.pi_cation = {'LIGAND-CHARGED_ALPHA': 0, 'LIGAND-CHARGED_BETA': 0, 'LIGAND-CHARGED_OTHER': 0, 'RECEPTOR-CHARGED_ALPHA': 0, 'RECEPTOR-CHARGED_BETA': 0, 'RECEPTOR-CHARGED_OTHER': 0} for key in pi_cation_tmp: self.pi_cation[key] = pi_cation_tmp[key] pi_cation_ligand_charged = self.pi_cation['LIGAND-CHARGED_ALPHA'] + self.pi_cation['LIGAND-CHARGED_BETA'] + self.pi_cation['LIGAND-CHARGED_OTHER'] pi_cation_receptor_charged = self.pi_cation['RECEPTOR-CHARGED_ALPHA'] + self.pi_cation['RECEPTOR-CHARGED_BETA'] + self.pi_cation['RECEPTOR-CHARGED_OTHER'] self.pi_cation_charge_location = {'LIGAND':pi_cation_ligand_charged, 'RECEPTOR':pi_cation_receptor_charged} pi_cation_alpha = self.pi_cation['LIGAND-CHARGED_ALPHA'] + self.pi_cation['RECEPTOR-CHARGED_ALPHA'] pi_cation_beta = self.pi_cation['LIGAND-CHARGED_BETA'] + self.pi_cation['RECEPTOR-CHARGED_BETA'] pi_cation_other = self.pi_cation['LIGAND-CHARGED_OTHER'] + self.pi_cation['RECEPTOR-CHARGED_OTHER'] self.pi_cation_by_structure = {'ALPHA':pi_cation_alpha, 'BETA':pi_cation_beta, "OTHER":pi_cation_other} all = self.pi_cation['LIGAND-CHARGED_ALPHA'] + self.pi_cation['LIGAND-CHARGED_BETA'] + self.pi_cation['LIGAND-CHARGED_OTHER'] + self.pi_cation['RECEPTOR-CHARGED_ALPHA'] + self.pi_cation['RECEPTOR-CHARGED_BETA'] + self.pi_cation['RECEPTOR-CHARGED_OTHER'] self.pi_cation_all = {'all': all} t_shaped_tmp = {} for item in data['t_shaped']: item = item.split("_") t_shaped_tmp[item[1]] = int(item[2]) self.t_shaped = {'ALPHA': 0, 'BETA': 0, 'OTHER': 0} for key in t_shaped_tmp: self.t_shaped[key] = t_shaped_tmp[key] all = self.t_shaped['ALPHA'] + self.t_shaped['BETA'] + self.t_shaped['OTHER'] self.t_shaped_all = {'all': all} self.salt_bridges = {'ALPHA': 0, 'BETA': 0, 'OTHER': 0} for key in data['salt_bridges']: key2 = key.replace("SALT-BRIDGE_","") self.salt_bridges[key2] = data['salt_bridges'][key] all = self.salt_bridges['ALPHA'] + self.salt_bridges['BETA'] + self.salt_bridges['OTHER'] self.salt_bridges_all = {'all': all} self.input_vector = [] self.input_vector.extend(self.vina_output) # a list self.input_vector = self.extend_list_by_dictionary(self.input_vector, self.ligand_receptor_atom_type_pairs_less_than_four) self.input_vector = self.extend_list_by_dictionary(self.input_vector, self.ligand_receptor_atom_type_pairs_electrostatic) self.input_vector = self.extend_list_by_dictionary(self.input_vector, self.ligand_atom_types) self.input_vector = self.extend_list_by_dictionary(self.input_vector, self.ligand_receptor_atom_type_pairs_less_than_two_half) self.input_vector = self.extend_list_by_dictionary(self.input_vector, self.hbonds) self.input_vector = self.extend_list_by_dictionary(self.input_vector, self.hydrophobics) self.input_vector = self.extend_list_by_dictionary(self.input_vector, self.stacking) self.input_vector = self.extend_list_by_dictionary(self.input_vector, self.pi_cation) self.input_vector = self.extend_list_by_dictionary(self.input_vector, self.t_shaped) self.input_vector = self.extend_list_by_dictionary(self.input_vector, self.active_site_flexibility) self.input_vector = self.extend_list_by_dictionary(self.input_vector, self.salt_bridges) self.input_vector = self.extend_list_by_dictionary(self.input_vector, self.rotateable_bonds_count) class command_line_parameters: params = {} def __init__(self, parameters): global vina_executable # first, set defaults self.params['receptor'] = '' self.params['ligand'] = '' self.params['vina_executable'] = vina_executable self.params['check_vina_version'] = "TRUE" # TRUE by default, but setting to false will speed up execution. Good when rescoring many poses. # now get user inputed values for index in range(len(parameters)): item = parameters[index] if item[:1] == '-': # so it's a parameter key value key = item.replace('-','').lower() if key == "help": print ("INTRODUCTION") print ("============\n") print (textwrap.fill("NNScore 2.01 (NNScore2.py) is a python script for predicting the binding affinity of receptor-ligand complexes. It is particularly well suited for rescoring ligands that have been docked using AutoDock Vina.") + "\n") print ("REQUIREMENTS") print ("============\n") print (textwrap.fill("Python: NNScore 2.01 has been tested using Python versions 2.6.5, 2.6.1, and 2.5.2 on Ubuntu 10.04.1 LTS, Mac OS X 10.6.8, and Windows XP Professional, respectively. A copy of the Python interpreter can be downloaded from http://www.python.org/getit/.") + "\n") print (textwrap.fill("AutoDock Vina 1.1.2: NNScore 2.01 uses AutoDock Vina 1.1.2 to obtain some information about the receptor-ligand complex. Note that previous versions of AutoDock Vina are not suitble. AutoDock Vina 1.1.2 can be downloaded from http://vina.scripps.edu/download.html.") + "\n") print (textwrap.fill("MGLTools: As receptor and ligand inputs, NNScore 2.01 accepts models in the PDBQT format. Files in the more common PDB format can be converted to the PDBQT format using scripts included in MGLTools (prepare_receptor4.py and prepare_ligand4.py). MGLTools can be obtained from http://mgltools.scripps.edu/downloads.") + "\n") print ("COMMAND-LINE PARAMETERS") print ("=======================\n") print (textwrap.fill("-receptor: File name of the receptor PDBQT file.") + "\n") print (textwrap.fill("-ligand: File name of the ligand PDBQT file. AutoDock Vina output files, typically containing multiple poses, are also permitted.") + "\n") print (textwrap.fill("-vina_executable: The location of the AutoDock Vina 1.1.2 executable. If you don't wish to specify the location of this file every time you use NNScore 2.01, simply edit the vina_executable variable defined near the begining of the NNScore2.py script.") + "\n") print ("PROGRAM OUTPUT") print ("==============\n") print (textwrap.fill("NNScore 2.01 evaluates each of the ligand poses contained in the file specified by the -ligand tag using 20 distinct neural-network scoring functions. The program then seeks to identify which of the poses has the highest predicted affinity using several metrics:") + "\n") print (textwrap.fill("1) Each of the 20 networks are considered separately. The poses are ranked in 20 different ways by the scores assigned by each network.") + "\n") print (textwrap.fill("2) The poses are ranked by the best score given by any of the 20 networks.") + "\n") print (textwrap.fill("3) The poses are ranked by the average of the scores given by the 20 networks. This is the recommended metric.") + "\n") sys.exit(0) value = parameters[index+1] if key in self.params.keys(): self.params[key] = value parameters[index] = "" parameters[index + 1] = "" if self.okay_to_proceed() == True: print ("Command-line parameters used:") print ("\tReceptor: " + self.params["receptor"]) print ("\tLigand: " + self.params["ligand"]) print ("\tVina executable: " + self.params["vina_executable"] + "\n") # make a list of all the command-line parameters not used error = "" for index in range(1,len(parameters)): item = parameters[index] if item != "": error = error + item + " " if error != "": print ("WARNING: The following command-line parameters were not used:") print ("\t" + error + "\n") # do a check to make sure the autodock vina is version 1.1.2 if self.params["check_vina_version"] == "TRUE": vina_version_output = "" if not os.path.exists(self.params['vina_executable']): vina_version_output = "" else: try: vina_version_output = getCommandOutput2(self.params['vina_executable'] + ' --version') except: vina_version_output = "" if not " 1.1.2 " in vina_version_output: print ("ERROR: NNScore 2.01 is designed to work with AutoDock Vina 1.1.2.\nOther versions of AutoDock may not work properly. Please download\nAutoDock Vina 1.1.2 from http://vina.scripps.edu/download.html.\n") print ("Once this executable is downloaded, you can use the -vina_executable\ntag to indicate its location. Alternatively, you can simply modify\nthe vina_executable variable defined near the beginning of\nthe NNScore2.py file.\n") sys.exit(0) def okay_to_proceed(self): if self.params['receptor'] != '' and self.params['ligand'] != '' and self.params['vina_executable'] != '': return True else: return False def score_to_kd(score): kd = math.pow(10,-score) if score <= 0: return "Kd = " + str(round(kd,2)) + " M" temp = kd * pow(10,3) if temp >=1 and temp <1000: return "Kd = " + str(round(temp,2)) + " mM" temp = temp * pow(10,3) if temp >=1 and temp <1000: return "Kd = " + str(round(temp,2)) + " uM" temp = temp * pow(10,3) if temp >=1 and temp <1000: return "Kd = " + str(round(temp,2)) + " nM" temp = temp * pow(10,3) if temp >=1 and temp <1000: return "Kd = " + str(round(temp,2)) + " pM" temp = temp * pow(10,3) #if temp >=1 and temp <1000: return "Kd = " + str(round(temp,2)) + " fM" #return "Kd = " + str(kd) + " M" print ("\nNNScore 2.01") print ("============\n") print (textwrap.fill("NNScore 2.01 is released under the GNU General Public License (see http://www.gnu.org/licenses/gpl.html). If you have any questions, comments, or suggestions, please contact the author, Jacob Durrant, at jdurrant [at] ucsd [dot] edu. If you use NNScore 2.01 in your work, please cite [REFERENCE HERE].")) print ("") print (textwrap.fill("NNScore 2.01 is based in part on the ffnet python package developed by Marek Wojciechowski (http://ffnet.sourceforge.net/).")) print ("") print ("Use the -help command-line parameter for extended help.\n") print ("Example: python NNScore2.py -receptor myreceptor.pdbqt -ligand myligand.pdbqt -vina_executable /PATH/TO/VINA/1.1.2/vina\n") cmd_params = command_line_parameters(sys.argv[:]) if cmd_params.okay_to_proceed() == False: print ("ERROR: You need to specify the ligand and receptor PDBQT files, as\nwell as the full path the the AutoDock Vina 1.1.2 executable, using the\n-receptor, -ligand, and -vina_executable tags from the command line.\nThe -ligand tag can also specify an AutoDock Vina output file.\n") sys.exit(0) lig = cmd_params.params['ligand'] rec = cmd_params.params['receptor'] def calculate_score(lig, rec, cmd_params, actual_filename_if_lig_is_list="", actual_filename_if_rec_is_list="", line_header = ""): d = binana(lig, rec, cmd_params, line_header, actual_filename_if_lig_is_list, actual_filename_if_rec_is_list) #Save Binana Descriptors (this was added, is extra from the lab) #f=open('binana_descriptors.csv','ab') #numpy.savetxt(f,numpy.array(actual_filename_if_lig_is_list[0:4],dtype=str).reshape(1,), fmt="%s") #numpy.savetxt(f,descriptors[None],delimiter=',') #f.close() #f = open("input_vector.pickle", "wb") ##pickle.dump(d, f) #f.close() #sys.exit() # now load in neural networks scores = [] total = 0.0 #nets = networks() nets = [] with open("dl_networks_03/networks.pickle", "rb") as pickle_file: nets = pickle.load(pickle_file) output_dict = {} # Save the pdb id f = re.findall('\w\w\w\w/', actual_filename_if_lig_is_list) pdb_id = f[len(f)-1].strip('/') output_dict[0] = pdb_id for net_array in nets: try: net = ffnet() net.load(net_array) val = net.normcall(d.input_vector) output_dict[len(output_dict)] = val print (line_header + "Network #" + str(len(scores) + 1) + " gave a score of " + str( np.round(val,3)) + " (" + score_to_kd(val) + ")") scores.append(val) total = total + val except OverflowError: print (line_header + "The output of network #" + str(len(scores) + 1) + " could not be determined because of an overflow error!") try: # Session and memory management config = K.tf.ConfigProto() config.gpu_options.allow_growth = True #config.gpu_options.visible_device_list = "0" K.set_session(K.tf.Session(config=config)) # Data processing input_data = np.array(d.input_vector) try: with open("dl_networks_03/transform.pickle", "rb") as f: transform = pickle.load(f) except FileNotFoundError: print("File transform.pickle not found") input_data = (input_data - transform['mean'])/ transform['std'] #with open("nonzero_column_indices.pickle", "rb") as f: # non_zero_columns = pickle.load(f) # Filter the zero columns out # input_data = input_data[non_zero_columns[:-1]] # ignoring the last column (y-value) # Load the neural networks count = 1 max_value = 0.0 print("\nNEURAL NETWORKS: ") for dl_net in dl_nets(): val = dl_net.predict(input_data.reshape(1, -1))[0][0] #val = dl_net.predict(input_data.values.reshape(1, -1))[0][0] max_value = np.maximum(max_value, val) print('\tNetwork # ', str(count), ' Score: ', val, ' (', score_to_kd(val), ')') count = count + 1 output_dict[len(output_dict)] = val # Delete the model and clear the session. del dl_net K.clear_session() print("Max value from DLSCORE: ", max_value) # Save the predicted value in a csv if not os.path.isdir('output'): os.mkdir('output') with open('output/' + pdb_id + '_predicted_values.csv', 'a') as f: w = csv.DictWriter(f, output_dict.keys()) #w.writeheader() w.writerow(output_dict) #print (line_header + "Network #" + str(len(scores) + 1) + " gave a score of " + str( np.round(val,3)) + " (" + score_to_kd(val) + ")") except IOError: print("Could not load the network file") if len(scores) != 0: average = total / len(scores) best_score = 0.0 best_network = -1 count = 1 sum = 0.0 for score in scores: if score > best_score: best_score = score best_network = count sum = sum + math.pow(score - average,2) count = count + 1 stdev = math.pow(sum / (len(scores)-1), 0.5) print ("") print (line_header + "Best Score: ", np.round(best_score,3), "(" + score_to_kd(best_score) + ")") print (line_header + "Average Score: ", np.round(average,3), "(" + score_to_kd(average) + ")") print (line_header + "Standard Deviation: ", round(stdev,3)) print ("") return [average, stdev, best_score, best_network, scores] else: print (line_header + "Could not compute the score of this receptor-ligand complex because none of the networks returned a valid score.") return [0, 0, 0, 0, []] # load the rec into an array so you only have to load it from the disk once print ("\nLOADING THE RECEPTOR") print ("====================\n") #f2 = open(rec,'r') #rec_array = f2.readlines() #f2.close() receptor = PDB() receptor.LoadPDB_from_file(rec) receptor.OrigFileName = rec print ("\nEVALUATING EACH OF THE POSES IN THE LIGAND FILE USING 20 TRAINED NEURAL NETWORKS") print ("================================================================================\n") # determine if the ligand input file is a single pdbqt or an autodock vina output file. Both are acceptable inputs. f = open(lig,'r') lig_array = [] line = "NULL" scores = [] model_id = 1 while len(line) != 0: line = f.readline() if line[:6] != "ENDMDL": lig_array.append(line) if line[:6] == "ENDMDL" or len(line) == 0: if len(lig_array) != 0 and lig_array != ['']: temp_filename = lig + ".MODEL_" + str(model_id) + ".pdbqt" temp_f = open(temp_filename, 'w') for ln in lig_array: temp_f.write(ln) temp_f.close() model_name = "MODEL " + str(model_id) print (model_name) score=calculate_score(lig_array, receptor, cmd_params, temp_filename, rec, "\t") scores.append([score[0], score[1], score[2], score[3], score[4], model_name]) os.remove(temp_filename) lig_array = [] model_id = model_id + 1 f.close() # now find the best score across all models, for each network print ("\nRANKED POSES AND SCORES WHEN EACH OF THE 20 NETWORKS IS CONSIDERED SEPARATELY") print ("=============================================================================\n") best_network_scores = [] for t in range(20): #for t in range(21): net_scores = [] for score in scores: net_scores.append((score[4][t],score[5])) net_scores = sorted(net_scores,key=lambda net_score: net_score[0], reverse=True) # sort by score print (("USING NETWORK #" + str(t+1)).center(42," ")) print (" Rank | Pose | Score | Predicted Kd ") print ("------+----------+-------+--------------") count = 1 best_network_scores.append(net_scores[0]) for net_score in net_scores: print (str(count).center(6," ") + "|" + net_score[1].center(10," ") + "|" + str(round(net_score[0],3)).center(7," ") + "|" + score_to_kd(net_score[0]).replace("Kd = ","").center(14," ")) count = count + 1 print ("") print ("\nRANKED POSES AND SCORES WHEN A CONSENSUS OF NETWORK OUTPUTS ARE CONSIDERED") print ("==========================================================================\n") # now get the best score of all vina output poses best_scores = sorted(scores,key=lambda score: score[2], reverse=True) # sort by score best_best_score = best_scores[0] print ("BEST SCORE OF ALL 20 NETWORKS, BY POSE".center(65," ")) print (" Rank | Pose | Average Score | Predicted Kd | Network") print ("------+----------+---------------+--------------+---------") count = 1 for score in best_scores: print (str(count).center(6," ") + "|" + score[5].center(10," ") + "|" + str(np.round(score[2],3)).center(15," ") + "|" + score_to_kd(score[2]).replace("Kd = ","").center(14," ") + "|" + ("#" + str(score[3])).center(9," ")) count = count + 1 print ("") # now get the best average score of all vina output poses average_scores = sorted(scores,key=lambda score: score[0], reverse=True) # sort by score best_of_average_scores = average_scores[0] print ("AVERAGE SCORE OF ALL 20 NETWORKS, BY POSE".center(70," ")) print (" Rank | Pose | Average Score | Standard Deviation | Predicted Kd") print ("------+----------+---------------+--------------------+--------------") count = 1 for score in average_scores: print (str(count).center(6," ") + "|" + score[5].center(10," ") + "|" + str(np.round(score[0],3)).center(15," ") + "|" + str(np.round(score[1],3)).center(20," ") + "|" + score_to_kd(score[0]).replace("Kd = ","").center(15," ")) count = count + 1 print ("") print ("\nSUMMARY") print ("=======\n") count = 1 for best_network_score in best_network_scores: print (textwrap.fill("Best pose scored by network #" + str(count) + ": " + best_network_score[1] + " (Score = " + str(np.round(best_network_score[0],3)) + " = " + score_to_kd(best_network_score[0]).replace("Kd = ","") + ")")) count = count + 1 print ("") print (textwrap.fill("When the poses were ranked by the best of the 21 network scores associated with each pose, the best-scoring pose was " + best_best_score[5] + " (Score = " + str(np.round(best_best_score[2],3)) + " = " + score_to_kd(best_best_score[2]).replace("Kd = ","") + ")")) print ("") print (textwrap.fill("When the poses were ranked by the average of the 21 network scores associated with each pose, the best-scoring pose was " + best_of_average_scores[5] + " (Score = " + str(np.round(best_of_average_scores[0],3)) + " +/- " + str(np.round(best_of_average_scores[1],3)) + " = " + score_to_kd(best_of_average_scores[0]).replace("Kd = ","") + "). This is the recommended ranking/scoring metric.")) print ("")