""" PDB parsing class This module parses PDBs in accordance to PDB Format Description Version 2.2 (1996); it is not very forgiving. Each class in this module corresponds to a record in the PDB Format Description. Much of the documentation for the classes is taken directly from the above PDB Format Description. ---------------------------- PDB2PQR -- An automated pipeline for the setup, execution, and analysis of Poisson-Boltzmann electrostatics calculations Copyright (c) 2002-2010, Jens Erik Nielsen, University College Dublin; Nathan A. Baker, Washington University in St. Louis; Paul Czodrowski & Gerhard Klebe, University of Marburg All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of University College Dublin, Washington University in St. Louis, or University of Marburg nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ---------------------------- """ __date__ = "4 August 2008" __author__ = "Todd Dolinsky, Yong Huang" import string, sys import copy ### PC class END: """ END class The END records are paired with MODEL records to group individual structures found in a coordinate entry. """ def __init__(self, line): """ Initialize by parsing line (nothing to do) """ pass class MASTER: """ MASTER class The MASTER record is a control record for bookkeeping. It lists the number of lines in the coordinate entry or file for selected record types. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION ------------------------------------------------- 11-15 int numRemark Number of REMARK records 21-25 int numHet Number of HET records 26-30 int numHelix Number of HELIX records 31-35 int numSheet Number of SHEET records 36-40 int numTurn Number of TURN records 41-45 int numSite Number of SITE records 46-50 int numXform Number of coordinate transformation records (ORIGX+SCALE+MTRIX) 51-55 int numCoord Number of atomic coordinate records (ATOM+HETATM) 56-60 int numTer Number of TER records 61-65 int numConect Number of CONECT records 66-70 int numSeq Number of SEQRES records """ record = string.strip(line[0:6]) if record == "MASTER": self.numRemark = int(string.strip(line[10:15])) self.numHet = int(string.strip(line[20:25])) self.numHelix = int(string.strip(line[25:30])) self.numSheet = int(string.strip(line[30:35])) self.numTurn = int(string.strip(line[35:40])) self.numSite = int(string.strip(line[40:45])) self.numXform = int(string.strip(line[45:50])) self.numCoord = int(string.strip(line[50:55])) self.numTer = int(string.strip(line[55:60])) self.numConect = int(string.strip(line[60:65])) self.numSeq = int(string.strip(line[65:70])) else: raise ValueError, record class CONECT: """ CONECT class The CONECT records specify connectivity between atoms for which coordinates are supplied. The connectivity is described using the atom serial number as found in the entry. CONECT records are mandatory for HET groups (excluding water) and for other bonds not specified in the standard residue connectivity table which involve atoms in standard residues (see Appendix 4 for the list of standard residues). These records are generated by the PDB. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION -------------------------------------------- 7-11 int serial Atom serial number 12-16 int serial1 Serial number of bonded atom 17-21 int serial2 Serial number of bonded atom 22-26 int serial3 Serial number of bonded atom 27-31 int serial4 Serial number of bonded atom 32-36 int serial5 Serial number of hydrogen bonded atom 37-41 int serial6 Serial number of hydrogen bonded atom 42-46 int serial7 Serial number of salt bridged atom 47-51 int serial8 Serial number of hydrogen bonded atom 52-56 int serial9 Serial number of hydrogen bonded atom 57-61 int serial10 Serial number of salt bridged atom """ record = string.strip(line[0:6]) if record == "CONECT": self.serial = int(string.strip(line[6:11])) try: self.serial1 = int(string.strip(line[11:16])) except ValueError: self.serial1 = None try: self.serial2 = int(string.strip(line[16:21])) except ValueError: self.serial2 = None try: self.serial3 = int(string.strip(line[21:26])) except ValueError: self.serial3 = None try: self.serial4 = int(string.strip(line[26:31])) except ValueError: self.serial4 = None try: self.serial5 = int(string.strip(line[31:36])) except ValueError: self.serial5 = None try: self.serial6 = int(string.strip(line[36:41])) except ValueError: self.serial6 = None try: self.serial7 = int(string.strip(line[41:46])) except ValueError: self.serial7 = None try: self.serial8 = int(string.strip(line[46:51])) except ValueError: self.serial8 = None try: self.serial9 = int(string.strip(line[51:56])) except ValueError: self.serial9 = None try: self.serial10 = int(string.strip(line[56:61])) except ValueError: self.serial10 = None else: raise ValueError, record class ENDMDL: """ ENDMDL class The ENDMDL records are paired with MODEL records to group individual structures found in a coordinate entry. """ def __init__(self, line): """ Initialize by parsing line (nothing to do) """ pass class TER: """ TER class The TER record indicates the end of a list of ATOM/HETATM records for a chain. """ def __init__(self, line): """ Initialize by parsing line: COLUMNS TYPE FIELD DEFINITION ------------------------------------------- 7-11 int serial Serial number. 18-20 string resName Residue name. 22 string chainID Chain identifier. 23-26 int resSeq Residue sequence number. 27 string iCode Insertion code. """ record = string.strip(line[0:6]) if record == "TER": try: # Not really needed self.serial = int(string.strip(line[6:11])) self.resName = string.strip(line[17:20]) self.chainID = string.strip(line[21]) self.resSeq = int(string.strip(line[22:26])) self.iCode = string.strip(line[26]) except (IndexError, ValueError): self.serial = None self.resName = None self.chainID = None self.resSeq = None self.iCode = None else: raise ValueError, record class SIGUIJ: """ SIGUIJ class The SIGUIJ records present the anisotropic temperature factors. """ def __init__(self, line): """ Initialize by parsing line: COLUMNS TYPE FIELD DEFINITION ------------------------------------------------------ 7-11 int serial Atom serial number. 13-16 string name Atom name. 17 string altLoc Alternate location indicator. 18-20 string resName Residue name. 22 string chainID Chain identifier. 23-26 int resSeq Residue sequence number. 27 string iCode Insertion code. 29-35 int sig11 Sigma U(1,1) 36-42 int sig22 Sigma U(2,2) 43-49 int sig33 Sigma U(3,3) 50-56 int sig12 Sigma U(1,2) 57-63 int sig13 Sigma U(1,3) 64-70 int sig23 Sigma U(2,3) 73-76 string segID Segment identifier, left-justified. 77-78 string element Element symbol, right-justified. 79-80 string charge Charge on the atom. """ record = string.strip(line[0:6]) if record == "SIGUIJ": self.serial = int(string.strip(line[6:11])) self.name = string.strip(line[12:16]) self.altLoc = string.strip(line[16]) self.resName = string.strip(line[17:20]) self.chainID = string.strip(line[21]) self.resSeq = int(string.strip(line[22:26])) self.iCode = string.strip(line[26]) self.sig11 = int(string.strip(line[28:35])) self.sig22 = int(string.strip(line[35:42])) self.sig33 = int(string.strip(line[42:49])) self.sig12 = int(string.strip(line[49:56])) self.sig13 = int(string.strip(line[56:63])) self.sig23 = int(string.strip(line[63:70])) self.segID = string.strip(line[72:76]) self.element = string.strip(line[76:78]) self.charge = string.strip(line[78:80]) else: raise ValueError, record class ANISOU: """ ANISOU class The ANISOU records present the anisotropic temperature factors. """ def __init__(self, line): """ Initialize by parsing line: COLUMNS TYPE FIELD DEFINITION ------------------------------------------------------ 7-11 int serial Atom serial number. 13-16 string name Atom name. 17 string altLoc Alternate location indicator. 18-20 string resName Residue name. 22 string chainID Chain identifier. 23-26 int resSeq Residue sequence number. 27 string iCode Insertion code. 29-35 int u00 U(1,1) 36-42 int u11 U(2,2) 43-49 int u22 U(3,3) 50-56 int u01 U(1,2) 57-63 int u02 U(1,3) 64-70 int u12 U(2,3) 73-76 string segID Segment identifier, left-justified. 77-78 string element Element symbol, right-justified. 79-80 string charge Charge on the atom. """ record = string.strip(line[0:6]) if record == "ANISOU": self.serial = int(string.strip(line[6:11])) self.name = string.strip(line[12:16]) self.altLoc = string.strip(line[16]) self.resName = string.strip(line[17:20]) self.chainID = string.strip(line[21]) self.resSeq = int(string.strip(line[22:26])) self.iCode = string.strip(line[26]) self.u00 = int(string.strip(line[28:35])) self.u11 = int(string.strip(line[35:42])) self.u22 = int(string.strip(line[42:49])) self.u01 = int(string.strip(line[49:56])) self.u02 = int(string.strip(line[56:63])) self.u12 = int(string.strip(line[63:70])) self.segID = string.strip(line[72:76]) self.element = string.strip(line[76:78]) self.charge = string.strip(line[78:80]) else: raise ValueError, record class SIGATM: """ SIGATM class The SIGATM records present the standard deviation of atomic parameters as they appear in ATOM and HETATM records. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION --------------------------------------------- 7-11 int serial Atom serial number. 13-16 string name Atom name. 17 string altLoc Alternate location indicator. 18-20 string resName Residue name. 22 string chainID Chain identifier. 23-26 int resSeq Residue sequence number. 27 string iCode Code for insertion of residues. 31-38 float sigX Standard devition of orthogonal coordinates for X in Angstroms. 39-46 float sigY Standard devition of orthogonal coordinates for Y in Angstroms. 47-54 float sigZ Standard devition of orthogonal coordinates for Z in Angstroms. 55-60 float sigOcc Standard devition of occupancy. 61-66 float sigTemp Standard devition of temperature factor. 73-76 string segID Segment identifier, left-justified. 77-78 string element Element symbol, right-justified. 79-80 string charge Charge on the atom. """ record = string.strip(line[0:6]) if record == "HETATM": self.serial = int(string.strip(line[6:11])) self.name = string.strip(line[12:16]) self.altLoc = string.strip(line[16]) self.resName = string.strip(line[17:20]) self.chainID = string.strip(line[21]) self.resSeq = int(string.strip(line[22:26])) self.iCode = string.strip(line[26]) self.sigX = float(string.strip(line[30:38])) self.sigY = float(string.strip(line[38:46])) self.sigZ = float(string.strip(line[46:54])) self.sigOcc = float(string.strip(line[54:60])) self.sigTemp = float(string.strip(line[60:66])) self.segID = string.strip(line[72:76]) self.element = string.strip(line[76:78]) self.charge = string.strip(line[78:80]) else: raise ValueError, record class HETATM: """ HETATM class The HETATM records present the atomic coordinate records for atoms within "non-standard" groups. These records are used for water molecules and atoms presented in HET groups. """ def __init__(self,line,sybylType="A.aaa",lBonds=[],lBondedAtoms=[]): ### PC """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION --------------------------------------------- 7-11 int serial Atom serial number. 13-16 string name Atom name. 17 string altLoc Alternate location indicator. 18-20 string resName Residue name. 22 string chainID Chain identifier. 23-26 int resSeq Residue sequence number. 27 string iCode Code for insertion of residues. 31-38 float x Orthogonal coordinates for X in Angstroms. 39-46 float y Orthogonal coordinates for Y in Angstroms. 47-54 float z Orthogonal coordinates for Z in Angstroms. 55-60 float occupancy Occupancy. 61-66 float tempFactor Temperature factor. 73-76 string segID Segment identifier, left-justified. 77-78 string element Element symbol, right-justified. 79-80 string charge Charge on the atom. """ record = string.strip(line[0:6]) if record == "HETATM": self.serial = int(string.strip(line[6:11])) self.name = string.strip(line[12:16]) self.altLoc = string.strip(line[16]) try: self.resName = string.strip(line[17:20]) self.chainID = string.strip(line[21]) self.resSeq = int(string.strip(line[22:26])) self.iCode = string.strip(line[26]) except: raise ValueError, 'Residue name must be less than 4 characters!' self.x = float(string.strip(line[30:38])) self.y = float(string.strip(line[38:46])) self.z = float(string.strip(line[46:54])) ### PC # self.lAtoms = lAtoms self.sybylType = sybylType self.lBondedAtoms = lBondedAtoms self.lBonds = lBonds self.radius = 1.0 self.isCterm=0 self.isNterm=0 ### try: self.occupancy = float(string.strip(line[54:60])) self.tempFactor = float(string.strip(line[60:66])) self.segID = string.strip(line[72:76]) self.element = string.strip(line[76:78]) self.charge = string.strip(line[78:80]) except ValueError, IndexError: self.occupancy = 0.00 self.tempFactor = 0.00 self.segID = "" self.element = "" self.charge = "" else: raise ValueError, record def __str__(self): """ Print object as string COLUMNS TYPE FIELD DEFINITION --------------------------------------------- 7-11 int serial Atom serial number. 13-16 string name Atom name. 17 string altLoc Alternate location indicator. 18-20 string resName Residue name. 22 string chainID Chain identifier. 23-26 int resSeq Residue sequence number. 27 string iCode Code for insertion of residues. 31-38 float x Orthogonal coordinates for X in Angstroms. 39-46 float y Orthogonal coordinates for Y in Angstroms. 47-54 float z Orthogonal coordinates for Z in Angstroms. 55-60 float occupancy Occupancy. 61-66 float tempFactor Temperature factor. 73-76 string segID Segment identifier, left-justified. 77-78 string element Element symbol, right-justified. 79-80 string charge Charge on the atom. """ str = "" tstr = "HETATM" str = str + string.ljust(tstr, 6)[:6] tstr = "%d" % self.serial str = str + string.rjust(tstr, 5)[:5] str = str + " " tstr = self.name if len(tstr) == 4: str = str + string.ljust(tstr, 4)[:4] else: str = str + " " + string.ljust(tstr, 3)[:3] tstr = self.altLoc str = str + string.ljust(tstr, 1)[:1] tstr = self.resName str = str + string.ljust(tstr, 3)[:3] str = str + " " tstr = self.chainID str = str + string.ljust(tstr, 1)[:1] tstr = "%d" % self.resSeq str = str + string.rjust(tstr, 4)[:4] tstr = self.iCode str = str + string.ljust(tstr, 1)[:1] str = str + " " tstr = "%8.3f" % self.x str = str + string.ljust(tstr, 8)[:8] tstr = "%8.3f" % self.y str = str + string.ljust(tstr, 8)[:8] tstr = "%8.3f" % self.z str = str + string.ljust(tstr, 8)[:8] tstr = "%6.2f" % self.occupancy str = str + string.ljust(tstr, 6)[:6] tstr = "%6.2f" % self.tempFactor str = str + string.rjust(tstr, 6)[:6] tstr = self.segID str = str + string.ljust(tstr, 4)[:4] tstr = self.element str = str + string.ljust(tstr, 2)[:2] tstr = self.charge str = str + string.ljust(tstr, 2)[:2] return str ### PC # to do: - parse SUBSTRUCTURE # - avoid/detect blanks in @<TRIPOS>BOND # - what happens, if no SUBSTRUCTURE present? # - different order of SUBSTRUCTURE/MOLECULE # - readlines instead of read -> blanks are avoided (you get a list) # - (maybe) flag for parsing each RTI class MOL2BOND: """ Bonding of MOL2 files """ def __init__(self, frm, to, type, id=0): self.to = to # bond to this atom self.frm = frm # bond from atom self.type = type # 1=single, 2=double, ar=aromatic self.id = id # bond_id class MOL2MOLECULE: """ Tripos MOL2 molecule For further information look at (web page exists: 25 August 2005): http://www.tripos.com/index.php?family=modules,SimplePage,,,&page=sup_mol2&s=0 """ def __init__(self): self.lAtoms = [] # all atoms of class <ATOM> self.lBonds = [] # all bonds of class <BOND> self.lPDBAtoms = [] # PDB-like list of all atoms def read(self,file): """ Routines for reading MOL2 file """ #self.filename = filename #data = open(self.filename).read() data = file.read() data = data.replace("\r\n", "\n") data = data.replace("\r", "\n") # ATOM section start = data.find("@<TRIPOS>ATOM") stop = data.find("@<TRIPOS>BOND") # Do some error checking if start == -1: raise Exception, "Unable to find '@<TRIPOS>ATOM' in MOL2 file!" elif stop == -1: raise Exception, "Unable to find '@<TRIPOS>BOND' in MOL2 file!" atoms = data[start+14:stop-2].split("\n") # BOND section start = data.find("@<TRIPOS>BOND") stop = data.find("@<TRIPOS>SUBSTRUCTURE") # More error checking if stop == -1: raise Exception, "Unable to find '@<TRIPOS>SUBSTRUCTURE' in MOL2 file!" bonds = data[start+14:stop-1].split("\n") self.parseAtoms(atoms) self.parseBonds(bonds) self.createlBondedAtoms() #self.createPDBlineFromMOL2(atoms) def parseAtoms(self,AtomList): """ for parsing @<TRIPOS>ATOM """ for AtomLine in AtomList: SeparatedAtomLine = AtomLine.split() # Special handling for blank lines if len(SeparatedAtomLine) == 0: continue # Error checking if len(SeparatedAtomLine) < 8: raise Exception, "Bad atom entry in MOL2 file: %s" % AtomLine fakeRecord = "HETATM" fakeChain = " L" try: mol2pdb = '%s%5i%5s%4s%2s%4i %8.3f%8.3f%8.3f' %\ (fakeRecord,int(SeparatedAtomLine[0]), SeparatedAtomLine[1],SeparatedAtomLine[7][:4], fakeChain,int(SeparatedAtomLine[6]), float(SeparatedAtomLine[2]),float(SeparatedAtomLine[3]), float(SeparatedAtomLine[4])) except ValueError: raise Exception, "Bad atom entry in MOL2 file: %s" % AtomLine thisAtom = HETATM(mol2pdb, SeparatedAtomLine[5],[],[]) if len(SeparatedAtomLine)>8: charge=SeparatedAtomLine[8] try: thisAtom.mol2charge=float(charge) except: print 'Warning. Non-float charge in mol2 file.',charge thisAtom.mol2charge=None self.lPDBAtoms.append(mol2pdb) self.lAtoms.append(thisAtom) def parseBonds(self,BondList): """ for parsing @<TRIPOS>BOND """ for BondLine in BondList: SeparatedBondLine = BondLine.split() # Special handling for blank lines if len(SeparatedBondLine) == 0: continue if len(SeparatedBondLine) < 4: raise Exception, "Bad bond entry in MOL2 file: %s" % BondLine try: thisBond = MOL2BOND( int(SeparatedBondLine[1]), # bond frm int(SeparatedBondLine[2]), # bond to SeparatedBondLine[3], # bond type int(SeparatedBondLine[0]) # bond id ) except ValueError: raise Exception, "Bad bond entry in MOL2 file: %s" % BondLine self.lBonds.append(thisBond) def createlBondedAtoms(self): """ Creates for each atom a list of the bonded Atoms This becomes one attribute of MOL2ATOM! """ for bond in self.lBonds: self.lAtoms[bond.frm-1].lBondedAtoms.append( self.lAtoms[bond.to-1]) self.lAtoms[bond.to-1].lBondedAtoms.append( self.lAtoms[bond.frm-1]) atbond = copy.deepcopy(bond) atbond.other_atom=self.lAtoms[bond.to-1] self.lAtoms[bond.frm-1].lBonds.append(atbond) atbond = copy.deepcopy(bond) atbond.other_atom=self.lAtoms[bond.frm-1] self.lAtoms[bond.to-1].lBonds.append(atbond) return def createPDBlineFromMOL2(self): FakeType = "HETATM" return ('%s%5i%5s%4s%2s%5s %8.3f%8.3f%8.3f\n' % (FakeType, self.serial, self.name, self.resName, ' L', self.resSeq, self.x,self.y, self.z)) ### PC class ATOM: """ ATOM class The ATOM records present the atomic coordinates for standard residues. They also present the occupancy and temperature factor for each atom. Heterogen coordinates use the HETATM record type. The element symbol is always present on each ATOM record; segment identifier and charge are optional. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION --------------------------------------------- 7-11 int serial Atom serial number. 13-16 string name Atom name. 17 string altLoc Alternate location indicator. 18-20 string resName Residue name. 22 string chainID Chain identifier. 23-26 int resSeq Residue sequence number. 27 string iCode Code for insertion of residues. 31-38 float x Orthogonal coordinates for X in Angstroms. 39-46 float y Orthogonal coordinates for Y in Angstroms. 47-54 float z Orthogonal coordinates for Z in Angstroms. 55-60 float occupancy Occupancy. 61-66 float tempFactor Temperature factor. 73-76 string segID Segment identifier, left-justified. 77-78 string element Element symbol, right-justified. 79-80 string charge Charge on the atom. """ record = string.strip(line[0:6]) if record == "ATOM": self.serial = int(string.strip(line[6:11])) self.name = string.strip(line[12:16]) self.altLoc = string.strip(line[16]) self.resName = string.strip(line[17:20]) self.chainID = string.strip(line[21]) self.resSeq = int(string.strip(line[22:26])) self.iCode = string.strip(line[26]) self.x = float(string.strip(line[30:38])) self.y = float(string.strip(line[38:46])) self.z = float(string.strip(line[46:54])) try: self.occupancy = float(string.strip(line[54:60])) self.tempFactor = float(string.strip(line[60:66])) self.segID = string.strip(line[72:76]) self.element = string.strip(line[76:78]) self.charge = string.strip(line[78:80]) except ValueError, IndexError: self.occupancy = 0.00 self.tempFactor = 0.00 self.segID = "" self.element = "" self.charge = "" else: raise ValueError, record def __str__(self): """ Print object as string COLUMNS TYPE FIELD DEFINITION --------------------------------------------- 7-11 int serial Atom serial number. 13-16 string name Atom name. 17 string altLoc Alternate location indicator. 18-20 string resName Residue name. 22 string chainID Chain identifier. 23-26 int resSeq Residue sequence number. 27 string iCode Code for insertion of residues. 31-38 float x Orthogonal coordinates for X in Angstroms. 39-46 float y Orthogonal coordinates for Y in Angstroms. 47-54 float z Orthogonal coordinates for Z in Angstroms. 55-60 float occupancy Occupancy. 61-66 float tempFactor Temperature factor. 73-76 string segID Segment identifier, left-justified. 77-78 string element Element symbol, right-justified. 79-80 string charge Charge on the atom. """ str = "" tstr = "ATOM" str = str + string.ljust(tstr, 6)[:6] tstr = "%d" % self.serial str = str + string.rjust(tstr, 5)[:5] str = str + " " tstr = self.name if len(tstr) == 4: str = str + string.ljust(tstr, 4)[:4] else: str = str + " " + string.ljust(tstr, 3)[:3] tstr = self.altLoc str = str + string.ljust(tstr, 1)[:1] tstr = self.resName str = str + string.ljust(tstr, 3)[:3] str = str + " " tstr = self.chainID str = str + string.ljust(tstr, 1)[:1] tstr = "%d" % self.resSeq str = str + string.rjust(tstr, 4)[:4] tstr = self.iCode str = str + string.ljust(tstr, 1)[:1] str = str + " " tstr = "%8.3f" % self.x str = str + string.ljust(tstr, 8)[:8] tstr = "%8.3f" % self.y str = str + string.ljust(tstr, 8)[:8] tstr = "%8.3f" % self.z str = str + string.ljust(tstr, 8)[:8] tstr = "%6.2f" % self.occupancy str = str + string.ljust(tstr, 6)[:6] tstr = "%6.2f" % self.tempFactor str = str + string.ljust(tstr, 6)[:6] tstr = self.segID str = str + string.ljust(tstr, 4)[:4] tstr = self.element str = str + string.ljust(tstr, 2)[:2] tstr = self.charge str = str + string.ljust(tstr, 2)[:2] return str class MODEL: """ MODEL class The MODEL record specifies the model serial number when multiple structures are presented in a single coordinate entry, as is often the case with structures determined by NMR. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION ----------------------------------------------------- 11-14 int serial Model serial number. """ record = string.strip(line[0:6]) if record == "MODEL": self.serial = int(string.strip(line[10:14])) else: raise ValueError, record class TVECT: """ TVECT class The TVECT records present the translation vector for infinite covalently connected structures. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION --------------------------------- 8-10 int serial Serial number 11-20 float t1 Components of translation vector 21-30 float t2 Components of translation vector 31-40 float t2 Components of translation vector 41-70 string text Comments """ record = string.strip(line[0:6]) if record == "TVECT": self.serial = int(string.strip(line[7:10])) self.t1 = float(string.strip(line[10:20])) self.t2 = float(string.strip(line[20:30])) self.t3 = float(string.strip(line[30:40])) self.text = string.strip(line[40:70]) else: raise ValueError, record class MTRIX3: """ MTRIX3 class The MTRIX3 (n = 1, 2, or 3) records present transformations expressing non-crystallographic symmetry. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION --------------------------------- 8-10 int serial Serial number 11-20 float mn1 M31 21-30 float mn2 M32 31-40 float mn3 M33 46-55 float vn V3 60 int iGiven 1 if coordinates for the representations which are approximately related by the transformations of the molecule are contained in the entry. Otherwise, blank. """ record = string.strip(line[0:6]) if record == "MTRIX3": self.serial = int(string.strip(line[7:10])) self.mn1 = float(string.strip(line[10:20])) self.mn2 = float(string.strip(line[20:30])) self.mn3 = float(string.strip(line[30:40])) self.vn = float(string.strip(line[45:55])) self.iGiven = int(string.strip(line[59])) else: raise ValueError, record class MTRIX2: """ MTRIX2 class The MTRIXn (n = 1, 2, or 3) records present transformations expressing non-crystallographic symmetry. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION --------------------------------- 8-10 int serial Serial number 11-20 float mn1 M21 21-30 float mn2 M22 31-40 float mn3 M23 46-55 float vn V2 60 int iGiven 1 if coordinates for the representations which are approximately related by the transformations of the molecule are contained in the entry. Otherwise, blank. """ record = string.strip(line[0:6]) if record == "MTRIX2": self.serial = int(string.strip(line[7:10])) self.mn1 = float(string.strip(line[10:20])) self.mn2 = float(string.strip(line[20:30])) self.mn3 = float(string.strip(line[30:40])) self.vn = float(string.strip(line[45:55])) self.iGiven = int(string.strip(line[59])) else: raise ValueError, record class MTRIX1: """ MTRIX1 class The MTRIXn (n = 1, 2, or 3) records present transformations expressing non-crystallographic symmetry. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION --------------------------------- 8-10 int serial Serial number 11-20 float mn1 M11 21-30 float mn2 M12 31-40 float mn3 M13 46-55 float vn V1 60 int iGiven 1 if coordinates for the representations which are approximately related by the transformations of the molecule are contained in the entry. Otherwise, blank. """ record = string.strip(line[0:6]) if record == "MTRIX1": self.serial = int(string.strip(line[7:10])) self.mn1 = float(string.strip(line[10:20])) self.mn2 = float(string.strip(line[20:30])) self.mn3 = float(string.strip(line[30:40])) self.vn = float(string.strip(line[45:55])) try: self.iGiven = int(string.strip(line[45:55])) except ValueError: self.iGiven = None except IndexError: self.iGiven = None else: raise ValueError, record class SCALE3: """ SCALE3 class The SCALEn (n = 1, 2, or 3) records present the transformation from the orthogonal coordinates as contained in the entry to fractional crystallographic coordinates. Non-standard coordinate systems should be explained in the remarks. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION --------------------------------- 11-20 float sn1 S31 21-30 float sn2 S32 31-40 float sn3 S33 46-55 float un U3 """ record = string.strip(line[0:6]) if record == "SCALE3": self.sn1 = float(string.strip(line[10:20])) self.sn2 = float(string.strip(line[20:30])) self.sn3 = float(string.strip(line[30:40])) self.un = float(string.strip(line[45:55])) else: raise ValueError, record class SCALE2: """ SCALE2 class The SCALEn (n = 1, 2, or 3) records present the transformation from the orthogonal coordinates as contained in the entry to fractional crystallographic coordinates. Non-standard coordinate systems should be explained in the remarks. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION --------------------------------- 11-20 float sn1 S21 21-30 float sn2 S22 31-40 float sn3 S23 46-55 float un U2 """ record = string.strip(line[0:6]) if record == "SCALE2": self.sn1 = float(string.strip(line[10:20])) self.sn2 = float(string.strip(line[20:30])) self.sn3 = float(string.strip(line[30:40])) self.un = float(string.strip(line[45:55])) else: raise ValueError, record class SCALE1: """ SCALE1 class The SCALEn (n = 1, 2, or 3) records present the transformation from the orthogonal coordinates as contained in the entry to fractional crystallographic coordinates. Non-standard coordinate systems should be explained in the remarks. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION --------------------------------- 11-20 float sn1 S11 21-30 float sn2 S12 31-40 float sn3 S13 46-55 float un U1 """ record = string.strip(line[0:6]) if record == "SCALE1": self.sn1 = float(string.strip(line[10:20])) self.sn2 = float(string.strip(line[20:30])) self.sn3 = float(string.strip(line[30:40])) self.un = float(string.strip(line[45:55])) else: raise ValueError, record class ORIGX2: """ ORIGX2 class The ORIGXn (n = 1, 2, or 3) records present the transformation from the orthogonal coordinates contained in the entry to the submitted coordinates. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION --------------------------------- 11-20 float on1 O21 21-30 float on2 O22 31-40 float on3 O23 46-55 float tn T2 """ record = string.strip(line[0:6]) if record == "ORIGX2": self.on1 = float(string.strip(line[10:20])) self.on2 = float(string.strip(line[20:30])) self.on3 = float(string.strip(line[30:40])) self.tn = float(string.strip(line[45:55])) else: raise ValueError, record class ORIGX3: """ ORIGX3 class The ORIGXn (n = 1, 2, or 3) records present the transformation from the orthogonal coordinates contained in the entry to the submitted coordinates. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION --------------------------------- 11-20 float on1 O31 21-30 float on2 O32 31-40 float on3 O33 46-55 float tn T3 """ record = string.strip(line[0:6]) if record == "ORIGX3": self.on1 = float(string.strip(line[10:20])) self.on2 = float(string.strip(line[20:30])) self.on3 = float(string.strip(line[30:40])) self.tn = float(string.strip(line[45:55])) else: raise ValueError, record class ORIGX1: """ ORIGX1 class The ORIGXn (n = 1, 2, or 3) records present the transformation from the orthogonal coordinates contained in the entry to the submitted coordinates. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION --------------------------------- 11-20 float on1 O11 21-30 float on2 O12 31-40 float on3 O13 46-55 float tn T1 """ record = string.strip(line[0:6]) if record == "ORIGX1": self.on1 = float(string.strip(line[10:20])) self.on2 = float(string.strip(line[20:30])) self.on3 = float(string.strip(line[30:40])) self.tn = float(string.strip(line[45:55])) else: raise ValueError, record class CRYST1: """ CRYST1 class The CRYST1 record presents the unit cell parameters, space group, and Z value. If the structure was not determined by crystallographic means, CRYST1 simply defines a unit cube. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION --------------------------------------- 7-15 float a a (Angstroms). 16-24 float b b (Angstroms). 25-33 float c c (Angstroms). 34-40 float alpha alpha (degrees). 41-47 float beta beta (degrees). 48-54 float gamma gamma (degrees). 56-66 string sGroup Space group. 67-70 int z Z value. """ record = string.strip(line[0:6]) if record == "CRYST1": self.a = float(string.strip(line[6:15])) self.b = float(string.strip(line[15:24])) self.c = float(string.strip(line[24:33])) self.alpha = float(string.strip(line[33:40])) self.beta = float(string.strip(line[40:47])) self.gamma = float(string.strip(line[47:54])) self.sGroup = string.strip(line[55:65]) self.z = int(string.strip(line[66:70])) else: raise ValueError, record class SITE: """ SITE class The SITE records supply the identification of groups comprising important sites in the macromolecule. """ def __init__(self, line): """ Initialize by parsing the line COLUMNS TYPE FIELD DEFINITION -------------------------------------------------------------- 8-10 int seqNum Sequence number. 12-14 string siteID Site name. 16-17 int numRes Number of residues comprising site. 19-21 string resName1 Residue name for first residue comprising site. 23 string chainID1 Chain identifier for first residue comprising site. 24-27 int seq1 Residue sequence number for first residue comprising site. 28 string iCode1 Insertion code for first residue comprising site. 30-32 string resName2 Residue name for second residue comprising site. 34 string chainID2 Chain identifier for second residue comprising site. 35-38 int seq2 Residue sequence number for second residue comprising site. 39 string iCode2 Insertion code for second residue comprising site. 41-43 string resName3 Residue name for third residue comprising site. 45 string chainID3 Chain identifier for third residue comprising site. 46-49 int seq3 Residue sequence number for third residue comprising site. 50 string iCode3 Insertion code for third residue comprising site. 52-54 string resName4 Residue name for fourth residue comprising site. 56 string chainID4 Chain identifier for fourth residue comprising site. 57-60 int seq4 Residue sequence number for fourth residue comprising site. 61 string iCode4 Insertion code for fourth residue comprising site. """ record = string.strip(line[0:6]) if record == "SITE": self.seqNum = int(string.strip(line[7:10])) self.siteID = string.strip(line[11:14]) self.numRes = int(string.strip(line[15:17])) self.resName1 = string.strip(line[18:21]) self.chainID1 = string.strip(line[22]) self.seq1 = int(string.strip(line[23:27])) self.iCode1 = string.strip(line[27]) self.resName2 = string.strip(line[29:32]) self.chainID2 = string.strip(line[33]) self.seq2 = int(string.strip(line[34:38])) self.iCode2 = string.strip(line[38]) self.resName3 = string.strip(line[40:43]) self.chainID3 = string.strip(line[44]) self.seq3 = int(string.strip(line[45:49])) self.iCode3 = string.strip(line[49]) self.resName4 = string.strip(line[51:54]) self.chainID4 = string.strip(line[55]) self.seq4 = int(string.strip(line[56:60])) try: self.iCode4 = string.strip(line[60]) except IndexError: self.iCode4 = None else: raise ValueError, record class CISPEP: """ CISPEP field CISPEP records specify the prolines and other peptides found to be in the cis conformation. This record replaces the use of footnote records to list cis peptides. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION ----------------------------------------------------------- 8-10 int serNum Record serial number. 12-14 string pep1 Residue name. 16 string chainID1 Chain identifier. 18-21 int seqNum1 Residue sequence number. 22 string icode1 Insertion code. 26-28 string pep2 Residue name. 30 string chainID2 Chain identifier. 32-35 int seqNum2 Residue sequence number. 36 string icode2 Insertion code. 44-46 int modNum Identifies the specific model. 54-59 float measure Measure of the angle in degrees. """ record = string.strip(line[0:6]) if record == "CISPEP": self.serNum = int(string.strip(line[7:10])) self.pep1 = string.strip(line[11:14]) self.chainID1 = string.strip(line[15]) self.seqNum1 = int(string.strip(line[17:21])) self.icode1 = string.strip(line[21]) self.pep2 = string.strip(line[25:28]) self.chainID2 = string.strip(line[29]) self.seqNum2 = int(string.strip(line[31:35])) self.icode2 = string.strip(line[35]) self.modNum = int(string.strip(line[43:46])) self.measure = float(string.strip(line[53:59])) else: raise ValueError, record class SLTBRG: """ SLTBRG field The SLTBRG records specify salt bridges in the entry. records and is provided here for convenience in searching. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION ----------------------------------------------------- 13-16 string name1 Atom name. 17 string altLoc1 Alternate location indicator. 18-20 string resName1 Residue name. 22 string chainID1 Chain identifier. 23-26 int resSeq1 Residue sequence number. 27 string iCode1 Insertion code. 43-46 string name2 Atom name. 47 string altLoc2 Alternate location indicator. 48-50 string resName2 Residue name. 52 string chainID2 Chain identifier. 53-56 int resSeq2 Residue sequence number. 57 string iCode2 Insertion code. 60-65 string sym1 Symmetry operator for 1st atom. 67-72 string sym2 Symmetry operator for 2nd atom. """ record = string.strip(line[0:6]) if record == "SLTBRG": self.name1 = string.strip(line[12:16]) self.altLoc1 = string.strip(line[16]) self.resName1 = string.strip(line[17:20]) self.chainID1 = string.strip(line[21]) self.resSeq1 = int(string.strip(line[22:26])) self.iCode1 = string.strip(line[26]) self.name2 = string.strip(line[42:46]) self.altLoc2 = string.strip(line[46]) self.resName2 = string.strip(line[47:50]) self.chainID2 = string.strip(line[51]) self.resSeq2 = int(string.strip(line[52:56])) self.iCode2 = string.strip(line[56]) self.sym1 = string.strip(line[59:65]) self.sym2 = string.strip(line[66:72]) else: raise ValueError, record class HYDBND: """ HYDBND field The HYDBND records specify hydrogen bonds in the entry. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION ----------------------------------------------------------- 13-16 string name1 Atom name. 17 string altLoc1 Alternate location indicator. 18-20 string resName1 Residue name. 22 string Chain1 Chain identifier. 23-27 int resSeq1 Residue sequence number. 28 string ICode1 Insertion code. 30-33 string nameH Hydrogen atom name. 34 string altLocH Alternate location indicator. 36 string ChainH Chain identifier. 37-41 int resSeqH Residue sequence number. 42 string iCodeH Insertion code. 44-47 string name2 Atom name. 48 string altLoc2 Alternate location indicator. 49-51 string resName2 Residue name. 53 string chainID2 Chain identifier. 54-58 int resSeq2 Residue sequence number. 59 string iCode2 Insertion code. 60-65 string sym1 Symmetry operator for 1st non-hydrogen atom. 67-72 string sym2 Symmetry operator for 2nd non-hydrogen atom. """ record = string.strip(line[0:6]) if record == "HYDBND": self.name1 = string.strip(line[12:16]) self.altLoc1 = string.strip(line[16]) self.resName1 = string.strip(line[17:20]) self.Chain1 = string.strip(line[21]) self.resSeq1 = string.strip(line[22:27]) self.ICode1 = string.strip(line[27]) self.nameH = string.strip(line[29:33]) self.altLocH = string.strip(line[33]) self.ChainH = string.strip(line[35]) self.resSeqH = string.strip(line[36:41]) self.ICodeH = string.strip(line[41]) self.name2 = string.strip(line[43:47]) self.altLoc2 = string.strip(line[47]) self.resName2 = string.strip(line[48:51]) self.Chain2 = string.strip(line[52]) self.resSeq2 = string.strip(line[53:58]) self.ICode2 = string.strip(line[58]) self.sym1 = string.strip(line[59:65]) self.sym2 = string.strip(line[66:72]) else: raise ValueError, record class LINK: """ LINK field The LINK records specify connectivity between residues that is not implied by the primary structure. Connectivity is expressed in terms of the atom names. This record supplements information given in CONECT records and is provided here for convenience in searching. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION ----------------------------------------------------- 13-16 string name1 Atom name. 17 string altLoc1 Alternate location indicator. 18-20 string resName1 Residue name. 22 string chainID1 Chain identifier. 23-26 int resSeq1 Residue sequence number. 27 string iCode1 Insertion code. 43-46 string name2 Atom name. 47 string altLoc2 Alternate location indicator. 48-50 string resName2 Residue name. 52 string chainID2 Chain identifier. 53-56 int resSeq2 Residue sequence number. 57 string iCode2 Insertion code. 60-65 string sym1 Symmetry operator for 1st atom. 67-72 string sym2 Symmetry operator for 2nd atom. """ record = string.strip(line[0:6]) if record == "LINK": self.name1 = string.strip(line[12:16]) self.altLoc1 = string.strip(line[16]) self.resName1 = string.strip(line[17:20]) self.chainID1 = string.strip(line[21]) self.resSeq1 = int(string.strip(line[22:26])) self.iCode1 = string.strip(line[26]) self.name2 = string.strip(line[42:46]) self.altLoc2 = string.strip(line[46]) self.resName2 = string.strip(line[47:50]) self.chainID2 = string.strip(line[51]) self.resSeq2 = int(string.strip(line[52:56])) self.iCode2 = string.strip(line[56]) self.sym1 = string.strip(line[59:65]) self.sym2 = string.strip(line[66:72]) else: raise ValueError, record class SSBOND: """ SSBOND field The SSBOND record identifies each disulfide bond in protein and polypeptide structures by identifying the two residues involved in the bond. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION ----------------------------------------------------- 8 - 10 int serNum Serial number. 16 string chainID1 Chain identifier. 18 - 21 int seqNum1 Residue sequence number. 22 string icode1 Insertion code. 30 string chainID2 Chain identifier. 32 - 35 int seqNum2 Residue sequence number. 36 string icode2 Insertion code. 60 - 65 string sym1 Symmetry operator for 1st residue. 67 - 72 string sym2 Symmetry operator for 2nd residue. """ record = string.strip(line[0:6]) if record == "SSBOND": self.serNum = int(string.strip(line[7:10])) self.chainID1 = string.strip(line[15]) self.seqNum1 = int(string.strip(line[17:21])) self.icode1 = string.strip(line[21]) self.chainID2 = string.strip(line[29]) self.seqNum2 = int(string.strip(line[31:35])) self.icode2 = string.strip(line[35]) self.sym1 = string.strip(line[59:65]) self.sym2 = string.strip(line[66:72]) else: raise ValueError, record class TURN: """ TURN field The TURN records identify turns and other short loop turns which normally connect other secondary structure segments. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION --------------------------------------------------------- 8-10 int seq Turn number; starts with 1 and increments by one. 12-14 string turnId Turn identifier 16-18 string initResName Residue name of initial residue in turn. 20 string initChainId Chain identifier for the chain containing this turn. 21-24 int initSeqNum Sequence number of initial residue in turn. 25 string initICode Insertion code of initial residue in turn. 27-29 string endResName Residue name of terminal residue of turn. 31 string endChainId Chain identifier for the chain containing this turn. 32-35 int endSeqNum Sequence number of terminal residue of turn. 36 string endICode Insertion code of terminal residue of turn. 41-70 string comment Associated comment. """ record = string.strip(line[0:6]) if record == "TURN": self.seq = int(string.strip(line[7:10])) self.turnId = string.strip(line[11:14]) self.initResName = string.strip(line[15:18]) self.initChainId = string.strip(line[19]) self.initSeqNum = int(string.strip(line[20:24])) self.initICode = string.strip(line[24]) self.endResName = string.strip(line[26:29]) self.endChainId = string.strip(line[30]) self.endSeqNum = int(string.strip(line[31:35])) self.endICode = string.strip(line[35]) self.comment = string.strip(line[40:70]) else: raise ValueError, record class SHEET: """ SHEET field SHEET records are used to identify the position of sheets in the molecule. Sheets are both named and numbered. The residues where the sheet begins and ends are noted. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION ------------------------------------------------- 8 - 10 int strand Strand number which starts at 1 for each strand within a sheet and increases by one. 12 - 14 string sheetID Sheet identifier. 15 - 16 int numStrands Number of strands in sheet. 18 - 20 string initResName Residue name of initial residue. 22 string initChainID Chain identifier of initial residue in strand. 23 - 26 int initSeqNum Sequence number of initial residue in strand. 27 string initICode Insertion code of initial residue in strand. 29 - 31 string endResName Residue name of terminal residue. 33 string endChainID Chain identifier of terminal residue. 34 - 37 int endSeqNum Sequence number of terminal residue. 38 string endICode Insertion code of terminal residue. 39 - 40 int sense Sense of strand with respect to previous strand in the sheet. 0 if first strand, 1 if parallel, -1 if anti-parallel. 42 - 45 string curAtom Registration. Atom name in current strand. 46 - 48 string curResName Registration. Residue name in current strand. 50 string curChainId Registration. Chain identifier in current strand. 51 - 54 int curResSeq Registration. Residue sequence number in current strand. 55 string curICode Registration. Insertion code in current strand. 57 - 60 string prevAtom Registration. Atom name in previous strand. 61 - 63 string prevResName Registration. Residue name in previous strand. 65 string prevChainId Registration. Chain identifier in previous strand. 66 - 69 int prevResSeq Registration. Residue sequence number in previous strand. 70 string prevICode Registration. Insertion code in previous strand. """ record = string.strip(line[0:6]) if record == "SHEET": self.strand = int(string.strip(line[7:10])) self.sheetID = string.strip(line[11:14]) self.numStrands = int(string.strip(line[14:16])) self.initResName = string.strip(line[17:20]) self.initChainID = string.strip(line[21]) self.initSeqNum = int(string.strip(line[22:26])) self.initICode = string.strip(line[26]) self.endResName = string.strip(line[28:31]) self.endChainID = string.strip(line[32]) self.endSeqNum = int(string.strip(line[33:37])) self.endICode = string.strip(line[37]) self.sense = int(string.strip(line[38:40])) try: self.curAtom = string.strip(line[41:45]) self.curResName = string.strip(line[45:48]) self.curChainID = string.strip(line[49]) try: self.curResSeq = int(string.strip(line[50:54])) except ValueError: self.curResSeq = None self.curICode = string.strip(line[54]) self.prevAtom = string.strip(line[56:60]) self.prevResName = string.strip(line[60:63]) self.prevChainID = string.strip(line[64]) try: self.prevResSeq = int(string.strip(line[65:69])) except ValueError: self.prevResSeq = None self.prevICode = string.strip(line[69]) except IndexError: self.curAtom = None self.curResName = None self.curChainID = None self.curResSeq = None self.curICode = None self.prevAtom = None self.prevResName = None self.prevChainID = None self.prevResSeq = None self.prevICode = None else: raise ValueError, record class HELIX: """ HELIX field HELIX records are used to identify the position of helices in the molecule. Helices are both named and numbered. The residues where the helix begins and ends are noted, as well as the total length. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION ------------------------------------------------------ 8-10 int serNum Serial number of the helix. This starts at 1 and increases incrementally. 12-14 string helixID Helix identifier. In addition to a serial number, each helix is given an alphanumeric character helix identifier. 16-18 string initResName Name of the initial residue. 20 string initChainID Chain identifier for the chain containing this helix. 22-25 int initSeqNum Sequence number of the initial residue. 26 string initICode Insertion code of the initial residue. 28-30 string endResName Name of the terminal residue of the helix. 32 string endChainID Chain identifier for the chain containing this helix. 34-37 int endSeqNum Sequence number of the terminal residue. 38 string endICode Insertion code of the terminal residue. 39-40 int helixClass Helix class (see below). 41-70 string comment Comment about this helix. 72-76 int length Length of this helix. """ record = string.strip(line[0:6]) if record == "HELIX": self.serNum = int(string.strip(line[7:10])) self.helixID = string.strip(line[11:14]) self.initResName = string.strip(line[15:18]) self.initChainID = string.strip(line[19]) self.initSeqNum = int(string.strip(line[21:25])) self.initICode = string.strip(line[25]) self.endResName = string.strip(line[27:30]) self.endChainID = string.strip(line[31]) self.endSeqNum = int(string.strip(line[33:37])) self.endICode = string.strip(line[37]) try: self.helixClass = int(string.strip(line[38:40])) except ValueError: self.helixClass = None self.comment = string.strip(line[40:70]) try: self.length = int(string.strip(line[71:76])) except ValueError: self.length = None else: raise ValueError, record class FORMUL: """ FORMUL field The FORMUL record presents the chemical formula and charge of a non-standard group. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION ----------------------------------------------------- 9-10 int compNum Component number 13-15 string hetID Het identifier 19 string asterisk * for water 20-70 string text Chemical formula """ record = string.strip(line[0:6]) if record == "FORMUL": self.compNum = int(string.strip(line[8:10])) self.hetID = string.strip(line[12:15]) self.asterisk = string.strip(line[19]) self.text = string.strip(line[19:70]) else: raise ValueError, record class HETSYN: """ HETSYN field This record provides synonyms, if any, for the compound in the corresponding (i.e., same hetID) HETNAM record. This is to allow greater flexibility in searching for HET groups. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION ----------------------------------------------------- 12-14 string hetID Het identifier, right-justified. 16-70 string hetSynonyms List of synonyms """ record = string.strip(line[0:6]) if record == "HETSYN": self.hetID = string.strip(line[11:14]) self.hetSynonyms = string.strip(line[15:70]) else: raise ValueError, record class HETNAM: """ HETNAM field This record gives the chemical name of the compound with the given hetID. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION ----------------------------------------------------- 12-14 string hetID Het identifier, right-justified. 16-70 string text Chemical name. """ record = string.strip(line[0:6]) if record == "HETNAM": self.hetID = string.strip(line[11:14]) self.text = string.strip(line[15:70]) else: raise ValueError, record class HET: """ HET field HET records are used to describe non-standard residues, such as prosthetic groups, inhibitors, solvent molecules, and ions for which coordinates are supplied. Groups are considered HET if they are: - not one of the standard amino acids, and - not one of the nucleic acids (C, G, A, T, U, and I), and - not one of the modified versions of nucleic acids (+C, +G, +A, +T, +U, and +I), and - not an unknown amino acid or nucleic acid where UNK is used to indicate the unknown residue name. Het records also describe heterogens for which the chemical identity is unknown, in which case the group is assigned the hetID UNK. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION -------------------------------------------------------- 8-10 string hetID Het identifier, right-justified. 13 string ChainID Chain identifier. 14-17 int seqNum Sequence number. 18 string iCode Insertion code. 21-25 int numHetAtoms Number of HETATM records for the 31-70 string text Text describing Het group. """ record = string.strip(line[0:6]) if record == "HET": self.hetID = string.strip(line[7:10]) self.chainID = string.strip(line[12]) try: self.seqNum = int(string.strip(line[13])) except ValueError: self.seqNum = None self.iCode = string.strip(line[17]) self.numHetAtoms = int(string.strip(line[20:25])) self.text = string.strip(line[30:70]) else: raise ValueError, record class MODRES: """ MODRES field The MODRES record provides descriptions of modifications (e.g., chemical or post-translational) to protein and nucleic acid residues. Included are a mapping between residue names given in a PDB entry and standard residues. """ def __init__(self, line): """ Initialize by parsing a line COLUMNS TYPE FIELD DEFINITION --------------------------------------- 8-11 string idCode ID code of this entry. 13-15 string resName Residue name used in this entry. 17 string chainID Chain identifier. 19-22 int seqNum Sequence number. 23 string iCode Insertion code. 25-27 string stdRes Standard residue name. 30-70 string comment Description of the residue modification. """ record = string.strip(line[0:6]) if record == "MODRES": string.idCode = string.strip(line[7:11]) string.resName = string.strip(line[12:15]) string.chainID = string.strip(line[16]) string.seqNum = int(string.strip(line[18:22])) string.iCode = string.strip(line[22]) string.stdRes = string.strip(line[24:27]) string.comment = string.strip(line[29:70]) else: raise ValueError, record class SEQRES: """ SEQRES field SEQRES records contain the amino acid or nucleic acid sequence of residues in each chain of the macromolecule that was studied. """ def __init__(self, line): """ Initialize by parsing a line COLUMNS TYPE FIELD DEFINITION ----------------------------------------------------- 9-10 int serNum Serial number of the SEQRES record for the current chain. Starts at 1 and increments by one each line. Reset to 1 for each chain. 12 string chainID Chain identifier. This may be any single legal character, including a blank which is used if there is only one chain. 14-17 int numRes Number of residues in the chain. This value is repeated on every record. 20-22 string resName Residue name. 24-26 string resName Residue name. 28-30 string resName Residue name. 32-34 string resName Residue name. 36-38 string resName Residue name. 40-42 string resName Residue name. 44-46 string resName Residue name. 48-50 string resName Residue name. 52-54 string resName Residue name. 56-58 string resName Residue name. 60-62 string resName Residue name. 64-66 string resName Residue name. 68-70 string resName Residue name. """ record = string.strip(line[0:6]) if record == "SEQRES": self.serNum = int(string.strip(line[8:10])) self.chainID = string.strip(line[11]) self.numRes = int(string.strip(line[13:17])) self.resName = [] self.resName.append(string.strip(line[19:22])) self.resName.append(string.strip(line[23:26])) self.resName.append(string.strip(line[27:30])) self.resName.append(string.strip(line[31:34])) self.resName.append(string.strip(line[35:38])) self.resName.append(string.strip(line[39:42])) self.resName.append(string.strip(line[43:46])) self.resName.append(string.strip(line[47:50])) self.resName.append(string.strip(line[51:54])) self.resName.append(string.strip(line[55:58])) self.resName.append(string.strip(line[59:62])) self.resName.append(string.strip(line[63:66])) self.resName.append(string.strip(line[67:70])) else: raise ValueError, record class SEQADV: """ SEQADV field The SEQADV record identifies conflicts between sequence information in the ATOM records of the PDB entry and the sequence database entry given on DBREF. Please note that these records were designed to identify differences and not errors. No assumption is made as to which database contains the correct data. PDB may include REMARK records in the entry that reflect the depositor's view of which database has the correct sequence. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION ----------------------------------------------------- 8-11 string idCode ID code of this entry. 13-15 string resName Name of the PDB residue in conflict. 17 string chainID PDB chain identifier. 19-22 int seqNum PDB sequence number. 23 string iCode PDB insertion code. 25-28 string database Sequence database name. 30-38 string dbIdCode Sequence database accession number. 40-42 string dbRes Sequence database residue name. 44-48 int dbSeq Sequence database sequence number. 50-70 string conflict Conflict comment. """ record = string.strip(line[0:6]) if record == "SEQADV": self.idCode = string.strip(line[7:11]) self.resName = string.strip(line[12:15]) self.chainID = string.strip(line[16]) try: self.seqNum = int(string.strip(line[19:22])) except ValueError: self.seqNum = None self.iCode = string.strip(line[22]) self.database = string.strip(line[24:28]) self.dbIdCode = string.strip(line[29:38]) self.dbRes = string.strip(line[39:42]) self.dbSeq = int(string.strip(line[43:48])) self.conflict = string.strip(line[49:70]) else: raise ValueError, record class DBREF: """ DBREF field The DBREF record provides cross-reference links between PDB sequences and the corresponding database entry or entries. A cross reference to the sequence database is mandatory for each peptide chain with a length greater than ten (10) residues. For nucleic acid entries a DBREF record pointing to the Nucleic Acid Database (NDB) is mandatory when the corresponding entry exists in NDB. """ def __init__(self, line): """ Initialize by parsing a line. COLUMNS TYPE FIELD DEFINITION ------------------------------------------------------ 8-11 string idCode ID code of this entry. 13 string chainID Chain identifier. 15-18 int seqBegin Initial sequence number of the PDB sequence segment. 19 string insertBegin Initial insertion code of the PDB sequence segment. 21-24 int seqEnd Ending sequence number of the PDB sequence segment. 25 string insertEnd Ending insertion code of the PDB sequence segment. 27-32 string database Sequence database name. "PDB" when a corresponding sequence database entry has not been identified. 34-41 string dbAccession Sequence database accession code. For GenBank entries, this is the NCBI gi number. 43-54 string dbIdCode Sequence database identification code. For GenBank entries, this is the accession code. 56-60 int dbseqBegin Initial sequence number of the database seqment. 61 string dbinsBeg Insertion code of initial residue of the segment, if PDB is the reference. 63-67 int dbseqEnd Ending sequence number of the database segment. 68 string dbinsEnd Insertion code of the ending residue of the segment, if PDB is the reference. """ record = string.strip(line[0:6]) if record == "DBREF": self.idCode = string.strip(line[7:11]) self.chainID = string.strip(line[12]) self.seqBegin = int(string.strip(line[14:18])) self.insertBegin = string.strip(line[18]) self.seqEnd = int(string.strip(line[20:24])) self.insertEnd = string.strip(line[24]) self.database = string.strip(line[26:32]) self.dbAccession = string.strip(line[33:41]) self.dbIdCode = string.strip(line[42:54]) self.dbseqBegin = int(string.strip(line[55:60])) self.dbinsBeg = string.strip(line[60]) self.dbseqEnd = int(string.strip(line[62:67])) try: self.dbinsEnd = string.strip(line[67]) except IndexError: self.dbinsEnd = None else: raise ValueError, record class REMARK: """ REMARK field REMARK records present experimental details, annotations, comments, and information not included in other records. In a number of cases, REMARKs are used to expand the contents of other record types. A new level of structure is being used for some REMARK records. This is expected to facilitate searching and will assist in the conversion to a relational database. """ def __init__(self, line): """ Initialize by parsing line """ record = string.strip(line[0:6]) if record == "REMARK": self.remarkNum = int(string.strip(line[7:10])) self.remarkDict = {} remarkText = line[11:70] if self.remarkNum == 1: subfield = string.strip(line[11:20]) if subfield == "REFERENCE": self.remarkDict["refNum"] = int(string.strip(line[21:70])) elif subfield == "AUTH": self.remarkDict["authorList"] = string.strip(line[19:70]) elif subfield == "TITL": self.remarkDict["title"] = string.strip(line[19:70]) elif subfield == "EDIT": self.remarkDict["editorList"] = string.strip(line[19:70]) elif subfield == "REF": self.remarkDict["ref"] = string.strip(line[19:66]) elif subfield == "PUBL": self.remarkDict["pub"] = string.strip(line[19:70]) elif subfield == "REFN": self.remarkDict["refn"] = string.strip(line[19:70]) elif self.remarkNum == 2: restr = string.strip(line[22:27]) try: self.remarkDict["resolution"] = float(restr) except ValueError: self.remarkDict["comment"] = string.strip(line[11:70]) else: self.remarkDict["text"] = string.strip(line[11:70]) class JRNL: """ JRNL field The JRNL record contains the primary literature citation that describes the experiment which resulted in the deposited coordinate set. There is at most one JRNL reference per entry. If there is no primary reference, then there is no JRNL reference. Other references are given in REMARK 1. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION ----------------------------------------------- 13-70 string text See Details on web. """ record = string.strip(line[0:6]) if record == "JRNL": self.text = string.strip(line[12:70]) else: raise ValueError, record class SPRSDE: """ SPRSDE field The SPRSDE records contain a list of the ID codes of entries that were made obsolete by the given coordinate entry and withdrawn from the PDB release set. One entry may replace many. It is PDB policy that only the principal investigator of a structure has the authority to withdraw it. """ def __init__(self, line): """ Initialize by parsing line COLUMNS TYPE FIELD DEFINITION ----------------------------------------------- 12-20 string sprsdeDate Date this entry superseded the listed entries. 22-25 string idCode ID code of this entry. 32-35 string sIdCode ID code of a superseded entry. 37-40 string sIdCode ID code of a superseded entry. 42-45 string sIdCode ID code of a superseded entry. 47-50 string sIdCode ID code of a superseded entry. 52-55 string sIdCode ID code of a superseded entry. 57-60 string sIdCode ID code of a superseded entry. 62-65 string sIdCode ID code of a superseded entry. 67-70 string sIdCode ID code of a superseded entry. """ record = string.strip(line[0:6]) if record == "SPRSDE": self.sprsdeDate = string.strip(line[11:20]) self.idCode = string.strip(line[21:25]) self.sIdCodes = [] self.sIdCodes.append(string.strip(line[31:35])) self.sIdCodes.append(string.strip(line[36:40])) self.sIdCodes.append(string.strip(line[41:45])) self.sIdCodes.append(string.strip(line[46:50])) self.sIdCodes.append(string.strip(line[51:55])) self.sIdCodes.append(string.strip(line[56:60])) self.sIdCodes.append(string.strip(line[61:65])) self.sIdCodes.append(string.strip(line[66:70])) else: raise ValueError, record class REVDAT: """ REVDAT field REVDAT records contain a history of the modifications made to an entry since its release. """ def __init__(self, line): """ Initialize by parsing a line. COLUMNS TYPE FIELD DEFINITION ------------------------------------------------------- 8-10 int modNum Modification number. 14-22 string modDate Date of modification (or release for new entries). 24-28 string modId Identifies this particular modification. It links to the archive used internally by PDB. 32 int modType An integer identifying the type of modification. In case of revisions with more than one possible modType, the highest value applicable will be assigned. 40-45 string record Name of the modified record. 47-52 string record Name of the modified record. 54-59 string record Name of the modified record. 61-66 string record Name of the modified record. """ record = string.strip(line[0:6]) if record == "REVDAT": self.modNum = int(string.strip(line[7:10])) self.modDate = string.strip(line[13:22]) self.modId = string.strip(line[23:28]) self.modType = int(string.strip(line[31])) self.records = [] self.records.append(string.strip(line[39:45])) self.records.append(string.strip(line[46:52])) self.records.append(string.strip(line[53:59])) self.records.append(string.strip(line[60:66])) else: raise ValueError, record class AUTHOR: """ AUTHOR field The AUTHOR record contains the names of the people responsible for the contents of the entry. """ def __init__(self, line): """ Initialize by parsing a line COLUMNS TYPE FIELD DEFINITION -------------------------------------------------- 11-70 string authorList List of the author names, separated by commas """ record = string.strip(line[0:6]) if record == "AUTHOR": self.authorList = string.strip(line[10:70]) else: raise ValueError, record class EXPDTA: """ EXPDTA field The EXPDTA record identifies the experimental technique used. This may refer to the type of radiation and sample, or include the spectroscopic or modeling technique. Permitted values include: ELECTRON DIFFRACTION FIBER DIFFRACTION FLUORESCENCE TRANSFER NEUTRON DIFFRACTION NMR THEORETICAL MODEL X-RAY DIFFRACTION """ def __init__(self, line): """ Initialize by parsing a line COLUMNS TYPE FIELD DEFINITION -------------------------------------------------- 11-70 string technique The experimental technique(s) with optional comment describing the sample or experiment """ record = string.strip(line[0:6]) if record == "EXPDTA": self.technique = string.strip(line[10:70]) else: raise ValueError, record class KEYWDS: """ KEYWDS field The KEYWDS record contains a set of terms relevant to the entry. Terms in the KEYWDS record provide a simple means of categorizing entries and may be used to generate index files. This record addresses some of the limitations found in the classification field of the HEADER record. It provides the opportunity to add further annotation to the entry in a concise and computer-searchable fashion. """ def __init__(self, line): """ Initialize by parsing a line COLUMNS TYPE FIELD DEFINITION -------------------------------------------------- 11-70 string keywds Comma-separated list of keywords relevant to the entry """ record = string.strip(line[0:6]) if record == "KEYWDS": self.keywds = string.strip(line[10:70]) else: raise ValueError, record class SOURCE: """ SOURCE field The SOURCE record specifies the biological and/or chemical source of each biological molecule in the entry. Sources are described by both the common name and the scientific name, e.g., genus and species. Strain and/or cell-line for immortalized cells are given when they help to uniquely identify the biological entity studied. """ def __init__(self, line): """ Initialize by parsing a line COLUMNS TYPE FIELD DEFINITION -------------------------------------------------- 11-70 string source Identifies the source of the macromolecule in a token: value format """ record = string.strip(line[0:6]) if record == "SOURCE": self.source = string.strip(line[10:70]) else: raise ValueError, record class COMPND: """ COMPND field The COMPND record describes the macromolecular contents of an entry. Each macromolecule found in the entry is described by a set of token: value pairs, and is referred to as a COMPND record component. Since the concept of a molecule is difficult to specify exactly, PDB staff may exercise editorial judgment in consultation with depositors in assigning these names. For each macromolecular component, the molecule name, synonyms, number assigned by the Enzyme Commission (EC), and other relevant details are specified. """ def __init__(self, line): """ Initialize by parsing a line COLUMNS TYPE FIELD DEFINITION -------------------------------------------------- 11-70 string compound Description of the molecular list components. """ record = string.strip(line[0:6]) if record == "COMPND": self.compound = string.strip(line[10:70]) else: raise ValueError, record class CAVEAT: """ CAVEAT field CAVEAT warns of severe errors in an entry. Use caution when using an entry containing this record. """ def __init__(self, line): """ Initialize by parsing line. COLUMNS TYPE FIELD DEFINITION ---------------------------------------------------- 12-15 string idCode PDB ID code of this entry. 20-70 string comment Free text giving the reason for the CAVEAT. """ record = string.strip(line[0:6]) if record == "CAVEAT": self.idCode = string.strip(line[11:15]) self.comment = string.strip(line[19:70]) else: raise ValueError, record class TITLE: """ TITLE field The TITLE record contains a title for the experiment or analysis that is represented in the entry. It should identify an entry in the PDB in the same way that a title identifies a paper. """ def __init__(self, line): """ Initialize by parsing a line. COLUMNS TYPE FIELD DEFINITION --------------------------------------------- 11-70 string title Title of the experiment """ record = string.strip(line[0:6]) if record == "TITLE": self.title = string.strip(line[10:70]) else: raise ValueError, record class OBSLTE: """ OBSLTE field This record acts as a flag in an entry which has been withdrawn from the PDB's full release. It indicates which, if any, new entries have replaced the withdrawn entry. The format allows for the case of multiple new entries replacing one existing entry. """ def __init__(self, line): """ Initialize by parsing a line. COLUMNS TYPE FIELD DEFINITION ----------------------------------------------- 12-20 string repDate Date that this entry was replaced. 22-25 string idCode ID code of this entry. 32-35 string rIdCode ID code of entry that replaced this one. 37-40 string rIdCode ID code of entry that replaced this one. 42-45 string rIdCode ID code of entry that replaced this one. 47-50 string rIdCode ID code of entry that replaced this one. 52-55 string rIdCode ID code of entry that replaced this one. 57-60 string rIdCode ID code of entry that replaced this one. 62-65 string rIdCode ID code of entry that replaced this one. 67-70 string rIdCode ID code of entry that replaced this one. """ record = string.strip(line[0:6]) if record == "OBSLTE": self.repDate = string.strip(line[11:20]) self.idCode = string.strip(line[21:25]) self.rIdCodes = [] self.rIdCodes.append(string.strip(line[31:35])) self.rIdCodes.append(string.strip(line[36:40])) self.rIdCodes.append(string.strip(line[41:45])) self.rIdCodes.append(string.strip(line[46:50])) self.rIdCodes.append(string.strip(line[51:55])) self.rIdCodes.append(string.strip(line[56:60])) self.rIdCodes.append(string.strip(line[61:65])) self.rIdCodes.append(string.strip(line[67:70])) else: raise ValueError, record class HEADER: """ HEADER field The HEADER record uniquely identifies a PDB entry through the idCode field. This record also provides a classification for the entry. Finally, it contains the date the coordinates were deposited at the PDB. """ def __init__(self, line): """ Initialize by parsing a line. COLUMNS TYPE FIELD DEFINITION --------------------------------------------------------- 11-50 string classification Classifies the molecule(s) 51-59 string depDate Deposition date. This is the date the coordinates were received by the PDB 63-66 string idCode This identifier is unique within PDB """ record = string.strip(line[0:6]) if record == "HEADER": self.classification = string.strip(line[10:50]) self.depDate = string.strip(line[50:59]) self.IDcode = string.strip(line[62:66]) else: raise ValueError, record def readAtom(line): """ If the ATOM/HETATM is not column-formatted, try to get some information by parsing whitespace from the right. Look for five floating point numbers followed by the residue number. Parameters line: The line to parse(string) if record == ATOM: self.serial = int(string.strip(line[6:11])) self.name = string.strip(line[12:16]) self.altLoc = string.strip(line[16]) self.resName = string.strip(line[17:20]) self.chainID = string.strip(line[21]) self.resSeq = int(string.strip(line[22:26])) self.iCode = string.strip(line[26]) self.x = float(string.strip(line[30:38])) self.y = float(string.strip(line[38:46])) self.z = float(string.strip(line[46:54])) try: self.occupancy = float(string.strip(line[54:60])) self.tempFactor = float(string.strip(line[60:66])) self.segID = string.strip(line[72:76]) self.element = string.strip(line[76:78]) self.charge = string.strip(line[78:80]) except ValueError, IndexError: self.occupancy = 0.00 self.tempFactor = 0.00 self.segID = 0 self.element = 0 self.charge = 0 else: raise ValueError, record """ # Try to find 5 consecutive floats words = string.split(line) size = len(words) - 1 consec = 0 for i in range(size): entry = words[size - i] try: val = float(entry) consec = consec + 1 if consec == 5: break except ValueError: consec = 0 record = string.strip(line[0:6]) newline = line[0:22] newline = newline + string.rjust(words[size-i-1],4) newline = newline + string.rjust("",3) newline = newline + string.rjust(words[size-i],8) newline = newline + string.rjust(words[size-i+1],8) newline = newline + string.rjust(words[size-i+2],8) newline = newline + string.rjust(words[size-i+3],6) newline = newline + string.rjust(words[size-i+4],6) cmdstr = "%s(newline)" % record obj = eval(cmdstr) return obj def readPDB(file): """ Parse PDB-format data into array of Atom objects. Parameters file: open file object Returns (dict, errlist) dict: a dictionary indexed by PDB record names errlist: a list of record names that couldn't be parsed """ pdblist = [] # Array of parsed lines (as objects) errlist = [] # List of records we can't parse while 1: line = string.strip(file.readline()) if line == '': break # We assume we have a method for each PDB record and can therefore # parse them automatically try: record = string.strip(line[0:6]) if record not in errlist: cmdstr = "%s(line)" % record obj = eval(cmdstr) pdblist.append(obj) except NameError, details: errlist.append(record) except StandardError, details: if record == "ATOM" or record == "HETATM": try: obj = readAtom(line) pdblist.append(obj) except StandardError, details: sys.stderr.write("Error parsing line: %s\n" % details) sys.stderr.write("<%s>\n" % string.strip(line)) elif record == "SITE" or record == "TURN": pass elif record == "SSBOND" or record == "LINK": sys.stderr.write("Warning -- ignoring record: \n") sys.stderr.write("<%s>\n" % string.strip(line)) else: sys.stderr.write("Error parsing line: %s\n" % details) sys.stderr.write("<%s>\n" % string.strip(line)) return pdblist, errlist def getRandom(): """ Download a random PDB and return the path name. Returns path name of downloaded file """ import os, random URL = "ftp://ftp.rcsb.org/pub/pdb/data/structures/all/pdb/" pdblines = os.popen("ncftpls %s" % URL).readlines() pdbline = string.join(pdblines) pdbline = string.replace(pdbline, "\n", "") pdbline = string.replace(pdbline, "@", "") pdbline = string.strip(pdbline) pdblist = string.split(pdbline) pdbZ = random.choice(pdblist) os.popen("ncftpget %s/%s" % (URL, pdbZ)) os.popen("uncompress %s" % pdbZ) return pdbZ[:-2] def main(): """ Main driver for testing. Parses set number of random PDBs """ npdb = 1 sys.stdout.write("Testing %d PDBs...\n" % npdb) for i in range(0, npdb): sys.stdout.write("Getting random PDB...\n") path = getRandom() sys.stdout.write("Parsing %s...\n" % path) pdbdict, errlist = readPDB(open(path, "rU")) if len(errlist) > 0: sys.stdout.write("\tSkipped records: %s\n" % errlist) sys.stdout.write("\tNo skipped records.\n") if __name__ == "__main__": main()