#[SPARKY EXTENSION :: BMRB submission form creation ]=========================== # #AUTHOR #====== # > Nicolas Sapay, Ph.D. student at IBCP # Insitut de Biologie et Chimie des Proteines # 7 passage du Vercors # 69367 Lyon cedex 07,France # # > Any Bugs or comments can be reported at n.sapay@ibcp.fr #SYNOPSIS #======== # This Python script is a SPARKY EXTENSION that provides methods to create a # BMRB submission form directly from a SPARKY resonance list (Only for amino # acids). Methods defined here include : # - methods to detect possible atom nomenclature mistakes # - methods to detect redundant assignation in the resonance list # - methods to detect strong chemical shift Standard Deviation (SD) # # A NMR STAR template file is required to run the extension. The template file # can be generated on the BMRB web site: # www.bmrb.wisc.edu/elec_dep/gen_aa.html #[ PYTHON MODULE IMPORTATION ]================================================== import sys import re import pyutil import sputil import tkutil import tkFileDialog from Tkinter import * from UserDict import UserDict #=[ ALPHABETS ]================================================================= one2three_letter_code = { 'A': 'ALA', 'C': 'CYS', 'D': 'ASP', 'E': 'GLU', 'F': 'PHE', 'G': 'GLY', 'H': 'HIS', 'I': 'ILE', 'K': 'LYS', 'L': 'LEU', 'M': 'MET', 'N': 'ASN', 'P': 'PRO', 'Q': 'GLN', 'R': 'ARG', 'S': 'SER', 'T': 'THR', 'V': 'VAL', 'W': 'TRP', 'Y': 'TYR', 'X': 'UNK'} three2one_letter_code = { 'ALA': 'A', 'CYS': 'C', 'ASP': 'D', 'GLU': 'E', 'PHE': 'F', 'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LYS': 'K', 'LEU': 'L', 'MET': 'M', 'ASN': 'N', 'PRO': 'P', 'GLN': 'Q', 'ARG': 'R', 'SER': 'S', 'THR': 'T', 'VAL': 'V', 'TRP': 'W', 'TYR': 'Y', 'UNK': 'X'} #=[ AMBIGUITY RULES BASED ON IUPAC NOMENCLATURE & BMRB DEFINITIONS ]============ ambiguity2_CH = {'HA2' :('G'), 'HA3' :('G'), 'HB1' :('A'), 'HB2' :('A','P','R','N','D','C','Q','E','H','L','K','M','F','S','W','Y'), 'HB3' :('A','P','R','N','D','C','Q','E','H','L','K','M','F','S','W','Y'), 'HG2' :('P','R','Q','E','K','M'), 'HG3' :('P','R','Q','E','K','M'), 'HG11':('V') , 'HG12':('V','I') , 'HG13':('V','I'), 'HG21':('I','T','V') , 'HG22':('I','T','V'), 'HG23':('I','T','V'), 'HD2' :('P','R','K') , 'HD3' :('P','R','K'), 'HD11':('I','L') , 'HD12':('I','L') , 'HD13':('I','L'), 'HD21':('L') , 'HD22':('L') , 'HD23':('L'), 'HE1' :('M') , 'HE2' :('K','M') , 'HE3' :('K','M'), 'CG1' :('V') , 'CG2' :('V') , 'CD1' :('L') , 'CD2' :('L')} ambiguity2_ONH = {'NH1':'R', 'NH2':'R', 'HZ1' :'K', 'HZ2' :'K', 'HZ3' :'K', 'HD21':'N', 'HD22':'N', 'HE21':'Q', 'HE22':'Q'} ambiguity3 = {'HD1':('F','Y'), 'HD2':('F','Y'), 'CD1':('F'), 'CD2':('F','Y'), 'HE1':('F','Y'), 'HE2':('F','Y'), 'CE1':('F'), 'CE2':('F','Y')} ambiguity4 = {'HH11':'R', 'HH12':'R', 'HH21':'R', 'HH22':'R'} #=[ METAWIDGET CLASSES ]======================================================== class Radiobar(Frame): #======================================================== """ A radio button bar """ def __init__(self, parent=NONE, picks=[], bgcolor=None): #__________________ Frame.__init__(self, parent) self.var = StringVar() i = 0 for pick in picks : rad = Radiobutton(self, text=pick[0], value=pick[1], variable=self.var, highlightthickness=0, bg=bgcolor)#padx=15, bg=bgcolor) if i == 0 : rad.select() rad.grid(row= 0, column=i, sticky=E+W) i += 1 class AutoScrollBar(Scrollbar): #=============================================== """ A scrollbar that hides itself if it's not needed. Only works with the grid geometry manager.""" def set(self, lo, hi):#_____________________________________________________ if float(lo) <= 0.0 and float(hi) >= 1.0: # grid_remove is currently missing from Tkinter! self.tk.call("grid", "remove", self) else: self.grid() Scrollbar.set(self, lo, hi) def pack(self, **kw): #_____________________________________________________ raise TclError, "cannot use pack with this widget" def place(self, **kw): #____________________________________________________ raise TclError, "cannot use place with this widget" class AutoScrollCanvas(Frame): #================================================ """ An autoscrollable canvas from http://effbot.org/zone/tkinter-autoscrollbar.htm""" def __init__(self, parent=None, shiftdict=None, lexic=None, condition=None, threshold=None, state=None, bgcolor="white"): #build the listing which has to be shown if not condition : listing = [] elif condition == 'redundancy' : listing = self.redundant_list(shiftdict) elif condition == 'deviation' : listing = self.deviant_list(shiftdict, threshold) elif condition == 'nomenclature' : listing = self.nomenclature_list(shiftdict, lexic) elif condition == 'state' : listing = self.state_list(shiftdict, state) #scrollable canvas vscroll = AutoScrollBar(parent) vscroll.grid(row=1, column=1, sticky=N+S) hscroll = AutoScrollBar(parent, orient=HORIZONTAL) hscroll.grid(row=2, column=0, sticky=E+W) self.canvas = Canvas(parent, yscrollcommand=vscroll.set, xscrollcommand=hscroll.set) self.canvas.grid(row=1, column=0, sticky=N+E+W+S, ipadx=30)#, pady=5) vscroll.config(command=self.canvas.yview) hscroll.config(command=self.canvas.xview) parent.grid_rowconfigure(1, weight=1) parent.grid_columnconfigure(0, weight=1) #a frame associated to the canvas Frame.__init__(self, self.canvas) self.rowconfigure(2, weight=1) self.columnconfigure(1, weight=1) #show the listing self.selected = [0] * len(listing) for i in range(len(listing)) : header = Label(self, text=listing[i][0], font='fixed') if i%2 == 0 : header.configure(bg=bgcolor) header.grid(row=i, column=0, sticky=E) if i%2 == 0 : self.selected[i] = Radiobar(self, listing[i][1], bgcolor) else : self.selected[i] = Radiobar(self, listing[i][1]) self.selected[i].grid(row=i, column=1,sticky=W) self.canvas.create_window(1, 0, anchor=NW, window=self) self.update_idletasks() self.canvas.config(scrollregion=self.canvas.bbox("all")) def get(self, a_string='|'): #______________________________________________ """get selected value in the canvas""" selected_value = [0] * len(self.selected) for i in range(len(self.selected)) : selected_value[i] = self.selected[i].var.get().split(a_string) return selected_value def redundant_list(self, shiftdict): #______________________________________ """build the list of redundant assignations""" listing = [] for shift in shiftdict.duplicatedAtom() : header = "%1s%-4d::%-5s"%(shift.res, shift.rank, shift.atom) picks = [] for i in range(len(shift.redundancy)): text = "%6.2f ppm (assign %3dx)" % (shift.redundancy[i][0], shift.redundancy[i][2]) value = "%d|%s|%f|%f|%d"%(shift.rank, shift.atom, shift.redundancy[i][0], shift.redundancy[i][1], shift.redundancy[i][2]) picks.append([text, value]) listing.append([header, picks]) return listing def deviant_list(self, shiftdict, threshold): #_____________________________ """build the list of SD chemical shifts upper than threshold""" listing = [] for shift in shiftdict.upperThanThreshold(threshold) : header = "%1s%-4d::%-5s = %7.3f +/-%6.3f ppm "%(shift.res, shift.rank, shift.atom, shift.shift, shift.sd) text1 = "add" value1 = "%d|%s|%f|%f|%d|%s"%(shift.rank, shift.atom, shift.shift, shift.sd, shift.assign, 'a') text2 = "remove" value2 = "%d|%s|%s"%(shift.rank, shift.atom, 'r') picks = [[text1,value1], [text2,value2]] listing.append([header, picks]) return listing def nomenclature_list(self, shiftdict, lexic): #____________________________ """build the list of nomenclature mistakes""" listing = [] for shift in shiftdict.checkNomenclature(lexic) : header = "%1s%-4d::%-5s = %7.3f ppm "%(shift.res, shift.rank, shift.atom, shift.shift) text1 = "add" value1 = "%d|%s|%f|%f|%d|%s"%(shift.rank, shift.atom, shift.shift, shift.sd, shift.assign, 'a') text2 = "remove" value2 = "%d|%s|%s"%(shift.rank, shift.atom, 'r') picks = [[text1,value1], [text2,value2]] listing.append([header, picks]) return listing def state_list(self, shiftdict, state): #___________________________________ """build the list of non-retieved/added/found chemical shifts""" listing = [] for rank in shiftdict.keys() : for shift in shiftdict[rank].values() : if shift.state == state and state == 'added': header = "%1s%-4d::%-5s = @ ppm "%(shift.res, shift.rank, shift.atom) text1 = "add" value1 = "%d|%s|%f|%f|%d|%s"%(shift.rank, shift.atom, shift.shift, shift.sd, shift.assign, shift.state) text2 = "don't add" value2 = "%d|%s|%s"%(shift.rank, shift.atom, 'r') picks = [[text2,value2], [text1,value1]] listing.append([header, picks]) elif shift.state == state and state == 'not found': header = "%1s%-4d::%-5s = %7.3f ppm "%(shift.res, shift.rank, shift.atom, shift.shift) text1 = "add" value1 = "%d|%s|%f|%f|%d|%s"%(shift.rank, shift.atom, shift.shift, shift.sd, shift.assign, shift.state) text2 = "don't add" value2 = "%d|%s|%s"%(shift.rank, shift.atom, 'r') picks = [[text2,value2], [text1,value1]] listing.append([header, picks]) return listing class SetUpFrame(Frame): #====================================================== """ main setup frame""" def __init__(self, session=None, parent=None): #____________________________ Frame.__init__(self, parent, relief=GROOVE, bd=2)#, padx=5, pady=5) #Basic config self.session = session self.parent = parent self.resonance_list = None self.userseq = None self.userdict = None self.format = "XPLOR" self.star_file = None self.threshold = {'C': 0.1, 'H': 0.005, 'N': 0.01, 'O': 0.01, 'P': 0.01, 'S': 0.01} #Spectrum choice self.condition_choice = sputil.condition_menu(self.session, self, 'a) Conditions: ') self.condition_choice.frame.grid(row=1, column=0, sticky=W) #Load shifts in a dictionary try : self.resonance_list = self.condition_choice.condition().resonance_list() self.userseq, self.userdict = ResonanceList2ShiftDict(resonance_list) except : self.userseq, self.userdict = ResonanceList2ShiftDict() #Threshold choice Label(self, text='\nb) SD chemical shift threshold (in ppm)').grid(row=2, column=0, sticky=W) self.threshold_choice = tkutil.entry_row(self, '', (' 13C =' , self.threshold.get('C', 0.100), 10), (' 1H =' , self.threshold.get('H', 0.005), 10), (' 15N =' , self.threshold.get('N', 0.010), 10)) self.threshold_choice.frame.grid(row=3, column=0, sticky=S) #Nomenclature choice Label(self, text='\nc) Atom assignment nomenclature').grid(row=4, column=0, sticky=W) self.nomcl_choice = Radiobar(self, [["XPLOR", "XPLOR"],["IUPAC", "IUPAC"],["UCSF", "UCSF"],["BMRB","BMRB"]]) self.nomcl_choice.grid(row=5, column=0, sticky=S) self.lexic = UserLex(self.nomcl_choice.var.get()) #BMRB template file Label(self, text='\nd) Use this BMRB template file (in STAR format)').grid(row=6, column=0, sticky=W) Label(self, text=' - template file genrerator : www.bmrb.wisc.edu/elec_dep/gen_aa.html', fg="darkred").grid(row=7, column=0, sticky=W) self.starfile_choice = tkutil.file_field(self," - template file", "*.star", width=30) #self.starfile_choice.set('/home/nsapay-tatra/Sparky/Project/S26A_TFE/TOCSY_attribution/s26a_template.star') self.starfile_choice.frame.grid(row=8, column=0, sticky=W) #=[ DIALOG CLASSES ]============================================================ #=============================================================================== class sparky2bmrb_Dialog(tkutil.Dialog): #====================================== """ Main window of the "BMRB submission form builder" extension""" def __init__(self, session): #______________________________________________ #Load Dialog class ----------------------------------------------------- tkutil.Dialog.__init__(self, session.tk, 'BMRB submission form creation (only for amino acids)') self.session = session #The Main Setup Frame -------------------------------------------------- self.setup = SetUpFrame(session, self.top) self.setup.grid(row=0, column=0, sticky=N+E+W+S) #The "Redundancy" Frame ------------------------------------------------ self.redundancy = Frame(self.top, relief=GROOVE, bd=2)#, padx=5, pady=5) Label(self.redundancy, text='Redundant Chemical Shift Assignation',bg="lightgreen").grid(row=0, column=0, sticky=N+E+W+S) self.redundancy.canvas = AutoScrollCanvas(parent = self.redundancy, shiftdict = self.setup.userdict, condition = 'redundancy', bgcolor = "lightgreen") self.redundancy.grid(row=1, column=0, sticky=N+E+W+S) #The "Remove Deviation" Frame ------------------------------------------ self.deviation = Frame(self.top, relief=GROOVE, bd=2)#, padx=5, pady=5) Label(self.deviation, text='Chemical Shift Deviation > Treshold',bg="lightgrey").grid(row=0, column=0, sticky=N+E+W+S) self.deviation.canvas = AutoScrollCanvas(parent = self.deviation, shiftdict = self.setup.userdict, condition = 'deviation', threshold = self.setup.threshold, bgcolor = "lightgrey") self.deviation.grid(row=0, column=1, sticky=N+E+W+S) #The "Check nomenclature" Frame ---------------------------------------- self.nomenclature = Frame(self.top, relief=GROOVE, bd=2)#, padx=5, pady=5) Label(self.nomenclature, text='Nomenclature Mistakes',bg="lightblue").grid(row=0, column=0, sticky=N+E+W+S) self.nomenclature.canvas = AutoScrollCanvas(parent = self.nomenclature, shiftdict = self.setup.userdict, condition = 'nomenclature', lexic = self.setup.lexic, bgcolor = "lightblue") self.nomenclature.grid(row=1, column=1, sticky=N+E+W+S) #Buttons --------------------------------------------------------------- bRow = tkutil.button_row(self.top, ('Setup', self.setup_cb), ('Close', self.close_cb), ('Create', self.create_cb)) bRow.frame.grid(row=2, column=0, columnspan=2, pady=10) def setup_cb(self): #_______________________________________________________ """Setup all""" #update resonance list resonance_list = [] resonance_list = self.setup.condition_choice.condition().resonance_list() self.setup.userseq, self.setup.userdict = ResonanceList2ShiftDict(resonance_list) #update thresholds self.setup.threshold['C'] = float(self.setup.threshold_choice.variables[0].get()) self.setup.threshold['H'] = float(self.setup.threshold_choice.variables[1].get()) self.setup.threshold['N'] = float(self.setup.threshold_choice.variables[2].get()) #update nomenclature self.setup.lexic = UserLex(self.setup.nomcl_choice.var.get()) self.setup.star_file = self.setup.starfile_choice.get() #update redundant chemical shifts self.redundancy.canvas.canvas.delete("all") self.redundancy.canvas = AutoScrollCanvas(parent = self.redundancy, shiftdict = self.setup.userdict, condition = 'redundancy', bgcolor = "lightgreen") #update large SD chemical shifts self.deviation.canvas.canvas.delete("all") self.deviation.canvas = AutoScrollCanvas(parent = self.deviation, shiftdict = self.setup.userdict, condition = 'deviation', threshold = self.setup.threshold, bgcolor = "lightgrey") #update nomenclature mistakes self.nomenclature.canvas.canvas.delete("all") self.nomenclature.canvas = AutoScrollCanvas(parent = self.nomenclature, shiftdict = self.setup.userdict, condition = 'nomenclature', lexic = self.setup.lexic, bgcolor = "lightblue") def create_cb(self): #______________________________________________________ """Create BMRB submission form""" if self.setup.condition_choice.condition() == None : return 1 elif self.setup.starfile_choice.get() == None : return 1 resonance_list = self.setup.condition_choice.condition().resonance_list() self.setup.userseq, self.setup.userdict = ResonanceList2ShiftDict(resonance_list) #remove atoms and fix redundancies self.setup.userdict.fixRedundant(self.redundancy.canvas.get()) self.setup.userdict.remove(self.deviation.canvas.get()) self.setup.userdict.remove(self.nomenclature.canvas.get()) #load template file in a dictionary self.stardict, self.starseq = read_STAR(self.setup.starfile_choice.get()) sys.stderr.write("Read sequences :\n\t- Sparky resonance list\n%s\n\n\t- Star template file\n%s\n"% (self.setup.userseq, self.starseq)) #convert atom names to IUPAC nomenclature for rank in self.setup.userdict.keys(): for atom in self.setup.userdict[rank].keys(): if self.starseq[rank-1] and self.setup.userdict[rank][atom].res != self.starseq[rank-1] : sys.stderr.write(" Residue %d :: name conflict ... %s (star file) vs %s (resonance list)\n"% (rank, self.starseq[rank-1], self.setup.userdict[rank][atom].res)) self.setup.userdict[rank][atom].res = self.starseq[rank-1] new_atom = None new_atom = self.setup.userdict.convert2IUPAC(self.setup.userdict[rank][atom], self.setup.nomcl_choice.var.get()) if new_atom and new_atom != atom : self.setup.userdict[rank][atom].atom = new_atom #self.setup.userdict.setResidue(rank, new_atom, # self.setup.userdict[rank][atom].res, # self.setup.userdict[rank][atom].shift, # self.setup.userdict[rank][atom].sd, # self.setup.userdict[rank][atom].assign) #del self.setup.userdict[rank][atom] #retrieve atoms in star dictionary and complete the user dictionary for rank in range(1, len(self.starseq)+1): if self.setup.userdict.has_key(rank): assign_ambiguity(self.setup.userdict[rank]) self.setup.userdict[rank].expand_pseudoatom(self.setup.lexic) self.setup.userdict[rank].convert2bmrb(self.setup.lexic, self.setup.nomcl_choice.var.get()) atom_retrieval(self.setup.userdict[rank], self.stardict[rank]) #Open the window that summary the submission form creation creation = sputil.the_dialog(creation_Dialog, self.session) creation.show_everything(self.starseq, self.setup.userdict, self.setup.starfile_choice.get(), self.setup.condition_choice.condition(), self.session.project) creation.show_window(1) #=============================================================================== class creation_Dialog(tkutil.Settings_Dialog): #================================ """ Dialog window to select redundant shifts to be removed""" def __init__(self, session): #============================================== self.session = session tkutil.Settings_Dialog.__init__(self, session.tk, 'BMRM Submission Form : retrived chemical shift') def show_everything(self, sequence=None , userdict=None, starfile=None, condition=None, project=None): self.sequence = sequence self.userdict = userdict self.template_file = starfile self.condition = condition self.project = project #Show atoms defined in the template file and not retrived in the user resonances list self.added = Frame(self.top, relief=GROOVE, bd=2)#, padx=5, pady=5) longtext = 'Atoms defined in the template file and not retrived in your resonances list' Label(self.added, text=longtext, wraplength=300, bg="lightblue").grid(row=0, column=0, sticky=N+E+W+S) self.added.canvas = AutoScrollCanvas(parent = self.added, shiftdict = userdict, condition = 'state', state = 'added', bgcolor = 'lightblue') self.added.grid(row=0, column=0, sticky=N+E+W+S) #Show atoms defined in the user resonances list and not retrived in the template file self.not_retrieved = Frame(self.top, relief=GROOVE, bd=2)#, padx=5, pady=5) longtext = 'Atoms defined in your resonances list and not retrived in the template file' Label(self.not_retrieved, text=longtext, wraplength=300, bg="lightgreen").grid(row=0, column=0, sticky=N+E+W+S) self.not_retrieved.canvas = AutoScrollCanvas(parent = self.not_retrieved, shiftdict = userdict, condition = 'state', state = 'not found', bgcolor = 'lightgreen') self.not_retrieved.grid(row=0, column=1, sticky=N+E+W+S) #Choose an output file self.outputfile = starfile.split('.')[0] + '.bmrb' #Close Button(self.top, text='Save', command=self.save_file_cb).grid(row=1, column=0, sticky=E) Button(self.top, text='Close', command=self.close_cb).grid(row=1, column=1, sticky=W) def save_file_cb(self): #=================================================== """Save the BMRB submission form in a file""" self.outputfile = tkFileDialog.asksaveasfilename(initialfile = self.outputfile, title="Save your BMRB submission form", filetypes = [("Text files","*.txt"), ("NMR STAR files","*.star"), ("BMRB files","*.bmrb"), ("All files","*")]) self.userdict.remove(self.added.canvas.get()) self.userdict.remove(self.not_retrieved.canvas.get()) idbmrb = 1 header, footer = split_STAR(self.project, self.condition, self.template_file) try : bmrb = open(self.outputfile, 'w') except : sys.stderr.write("\nCan't open BMRB submission form ... check %s\n"%self.outputfile) return 1 for line in header : bmrb.write(line) for rank in range(1, len(self.sequence)+1): for atom in self.userdict[rank].keys(): for nuc in ('H', 'C', 'N', 'O', 'S'): for pos in ('', 'A', 'B', 'G', 'D', 'E', 'Z', 'H'): for num in ('', '1', '2', '3', '11', '12', '13', '21', '22', '23'): if self.userdict[rank][atom].atom == nuc+pos+num and self.userdict[rank][atom].state != 'added': values = ("%.2f"%self.userdict[rank][atom].shift).ljust(8)+("%.2f"%self.userdict[rank][atom].sd).ljust(8) bmrb.write("%-9d%-7d%-8s%-8s%-8s%s%-d\n"% (idbmrb, rank, one2three_letter_code[self.userdict[rank][atom].res], self.userdict[rank][atom].atom, self.userdict[rank][atom].nuc, values, self.userdict[rank][atom].ambiguity)) idbmrb += 1 elif self.userdict[rank][atom].atom == nuc+pos+num and self.userdict[rank][atom].state == 'added': bmrb.write("%-9d%-7d%-8s%-8s%-8s%-8s%-8s%-s #Not retrieved !\n"% (idbmrb, rank, one2three_letter_code[self.userdict[rank][atom].res], self.userdict[rank][atom].atom, self.userdict[rank][atom].nuc, '@', '@','@')) idbmrb += 1 for line in footer : bmrb.write(line) bmrb.close() #=[ ATOM NOMENCLATURE & CHEMICAL SHIFT CLASSES ]================================ #=============================================================================== class UserLex(UserDict): #====================================================== """Dictionaries for format conversion""" def __init__(self, format): #_______________________________________________ UserDict.__init__(self) self.alphabet = ('A','C','D','E','F','G','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y','X') if format == "XPLOR": self.backbone = {'HT1':self.alphabet, 'HT2':self.alphabet, 'HT3':self.alphabet, 'OT1':self.alphabet, 'OT2':self.alphabet, 'HN':self.alphabet , 'C':self.alphabet , 'CA':self.alphabet, 'N':self.alphabet, 'O':self.alphabet, 'HA':('A','C','D','E','F','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y','X'), 'HA1':('G','X'), 'HA2':('G','X'), 'HA#':('G','X')} self.sidechain = { 'CB' :('A','C','D','E','F','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y','X'), 'HB' :('I','T','V','X'),'HB1':('A','C','D','E','F','H','K','L','M','N','P','Q','R','S','W','Y','X'), 'HB2':('A','C','D','E','F','H','K','L','M','N','P','Q','R','S','W','Y','X'),'HB3':('A','X'), 'HB#':('A','C','D','E','F','H','K','L','M','N','P','Q','R','S','W','Y','X'), 'CG' :('D','E','F','H','K','L','M','N','P','Q','R','W','Y','X'), 'CG1' :('I','V','X'),'CG2' :('I','T','V','X'),'CG*' :('V','X'), 'HG' :('C','L','S','X'),'HG1' :('E','K','M','P','Q','R','T','X'),'HG2' :('E','K','M','P','Q','R','X'), 'HG11':('I','V','X'),'HG12':('I','V','X'),'HG13':('V','X'), 'HG21':('I','T','V','X'),'HG22':('I','T','V','X'),'HG23':('I','T','V','X'), 'HG#' :('E','K','M','P','Q','R','X'),'HG1#':('I','V','X'),'HG2#':('I','T','V','X'),'HG*' :('V','X'), 'CD' :('E','K','P','Q','R','X'),'CD1' :('F','I','L','W','Y','X'),'CD2' :('F','H','L','W','Y','X'), 'CD*' :('Y','F','L','X'), 'ND1' :('H','X'),'ND2' :('N','X'), 'HD1' :('F','H','K','P','R','W','Y','X'),'HD2' :('F','H','K','P','R','Y','X'), 'HD11':('I','L','X'),'HD12':('I','L','X'),'HD13':('I','L','X'), 'HD21':('N','L','X'),'HD22':('N','L','X'),'HD23':('L','X'),'HD#' :('P','R','K','X'), 'HD1#':('I','L','X'),'HD2#':('N','L','X'),'HD*' :('Y','F','L','X'), 'CE' :('K','M','X'),'CE1' :('F','Y','H','X'),'CE2' :('F','W','Y','X'),'CE3' :('W','X'),'CE*' :('F','Y','X'), 'NE' :('R','X'),'NE1' :('W','X'),'NE2' :('Q','H','X'), 'HE' :('R','X'),'HE1' :('H','K','M','F','W','Y','X'),'HE2' :('H','K','M','F','Y','X'),'HE3' :('M','W','X'), 'HE21':('Q','X'),'HE22':('Q','X'), 'HE#' :('K','M','X'),'HE2#':('Q','X'),'HE*' :('Y','F','X'), 'CZ' :('F','R','Y','X'),'CZ2':('W','X'),'CZ3':('W','X'),'NZ' :('K','X'), 'HZ' :('F','X'),'HZ1':('K','X'),'HZ2':('K','W','X'),'HZ3':('K','W','X'),'HZ#':('K','X'), 'CH2' :('W','X'),'NH1' :('R','X'),'NH2' :('R','X'), 'HH' :('Y','X'),'HH2' :('W','X'),'HH11':('R','X'),'HH12':('R','X'),'HH21':('R','X'),'HH22':('R','X'), 'HH1#':('R','X'),'HH2#':('R','X'),'HH*' :('R','X'), } self.H_12 = {'HG*' :('V','X'), 'CG*' :('V','X'), 'HD2#':('N','X'), 'HD*':('Y','F','L','X'), 'CD*':('Y','F','L','X'), 'HE2#':('Q','X'), 'HE*':('Y','F','X') , 'CE*':('Y','F','X'), 'HH1#':('R','X'), 'HH2#':('R','X')} self.H_23 = {'HA#':('G','X'), 'HB#':('P','R','N','D','C','Q','E','H','L','K','M','F','S','T','W','Y','X'), 'HG#':('P','R','Q','E','K','M','X') , 'HG1#':('I','X'), 'HD#':('P','R','K','X') , 'HE#':('K','X')} self.H_123 = {'HB#' :('A','X') , 'HG1#':('V','X'), 'HG2#':('V','I','T','X'), 'HD1#':('I','L','X'), 'HD2#':('L','X'), 'HE#':('M','X') , 'HZ#':('K','X')} self.H_1212 = {'HH*':('R','X')} elif format == "UCSF": self.backbone = {'HN1':self.alphabet, 'HN2':self.alphabet, 'HN3':self.alphabet, 'HN':self.alphabet, 'HA':('A','C','D','E','F','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y','X'), 'HA1':('G','X'), 'HA2':('G','X'), 'QA':('G','X')} self.sidechain = { 'HB' :('I','T','V','X'), 'HB1':('A','C','D','E','F','H','K','L','M','N','P','Q','R','S','W','Y','X'), 'HB2':('A','C','D','E','F','H','K','L','M','N','P','Q','R','S','W','Y','X'), 'HB3':('A','X'),'QB' :('A','C','D','E','F','H','K','L','M','N','P','Q','R','S','W','Y','X'), 'HG' :('L','X'),'HOG' :('S','X'),'HOG1' :('T','X'),'HSG' :('C','X'), 'HG1' :('E','K','M','P','Q','R','X'),'HG2' :('E','K','M','P','Q','R','X'), 'HG11':('I','V','X'),'HG12':('I','V','X'),'HG13':('V','X'), 'HG21':('I','T','V','X'),'HG22':('I','T','V','X'),'HG23':('I','T','V','X'), 'QG' :('E','K','M','P','Q','R','X'),'MG1' :('V','X'),'MG2' :('I','T','V','X'), 'HD1' :('F','K','P','R','W','Y','X'),'HND1':('H','X'),'HD2' :('F','H','K','P','R','Y','X'), 'HD11':('I','L','X'),'HD12':('I','L','X'),'HD13':('I','L','X'), 'HD21':('L','X'),'HD22':('L','X'),'HD23':('L','X'),'HN21':('N','X'),'HN22':('N','X'), 'QD' :('P','R','K','N','L','X'),'MD1' :('I','L','X'),'MD2' :('L','X'),'RD' :('Y','F','X'), 'HNE' :('R','X'),'HNE1':('W','X'),'HNE2':('H','X'), 'HE1' :('H','K','M','F','Y','X'),'HE2' :('K','M','F','Y','X'),'HE3' :('M','W','X'), 'HN21':('Q','X'),'HN22':('Q','X'),'QE' :('K','Q','X'),'ME' :('M','X'),'RE' :('Y','F','X'), 'HZ' :('F','X'),'HZ1' :('K','X'),'HNZ1':('K','X'),'HNZ2':('K','X'),'HNZ3':('K','X'), 'HZ2' :('W','X'),'HZ3' :('W','X'),'QZ' :('K','X'),'HOH' :('Y','X'), 'HH2' :('W','X'),'HN11':('R','X'),'HN12':('R','X'),'HN21':('R','X'),'HN22':('R','X'), 'QH1' :('R','X'),'QH2' :('R','X'),'QH' :('R','X'), } self.H_12 = {'QG' :('V','X'), 'QD' :('N','L','X'), 'RD' :('Y','F','X'), 'QE' :('Q','X'), 'RE' :('Y','F','X'), 'QH1':('R','X') , 'QH2':('R','X')} self.H_23 = {'QA':('G','X'), 'QB':('P','R','N','D','C','Q','E','H','L','K','M','F','S','T','W','Y','X'), 'QG':('P','R','Q','E','K','M','I','X'), 'QD':('P','R','K','X'), 'QE':('K','X')} self.H_1212 = {'QH':('R','X')} self.H_123 = {'QB':('A','X') , 'MG1':('V','X'), 'MG2':('I','V','T','X'), 'MD1':('I','L','X'), 'MD2':('L','X'), 'ME':('M','X') , 'QZ':('K','X')} elif format == "IUPAC": self.backbone = {'H1':self.alphabet, 'H2':self.alphabet, 'H3':self.alphabet, "H''":self.alphabet, 'O1':self.alphabet , 'O2':self.alphabet, 'HN':self.alphabet , 'C':self.alphabet , 'CA':self.alphabet , 'N':self.alphabet, 'O':self.alphabet, 'HA':('A','C','D','E','F','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y','X'), 'HA2':('G','X'), 'HA3':('G','X'), 'QA':('G','X')} self.sidechain = { 'CB' :('A','C','D','E','F','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y','X'), 'HB' :('I','T','V','X'),'HB1':('A','X'), 'HB2':('A','C','D','E','F','H','K','L','M','N','P','Q','R','S','W','Y','X'), 'HB3':('A','C','D','E','F','H','K','L','M','N','P','Q','R','S','W','Y','X'), 'QB' :('A','C','D','E','F','H','K','L','M','N','P','Q','R','S','W','Y','X'), 'CG' :('D','E','F','H','K','L','M','N','P','Q','R','W','Y','X'), 'CG1' :('I','V','X'),'CG2' :('I','T','V','X'), 'HG' :('C','L','S','X'),'HG2' :('E','K','M','P','Q','R','X'),'HG3' :('E','K','M','P','Q','R','X'), 'HG11':('T','V','X'),'HG12':('I','V','X'),'HG13':('I','V','X'), 'HG21':('I','T','V','X'),'HG22':('I','T','V','X'),'HG23':('I','T','V','X'), 'QG' :('E','K','M','P','Q','R','V','X'),'MG' :('I','T','X'),'MG1' :('V','X'),'MG2' :('V','X'), 'CD' :('E','K','P','Q','R','X'),'CD1' :('F','I','L','W','Y','X'),'CD2' :('F','H','L','W','Y','X'), 'ND1' :('H','X'),'ND2' :('N','X'), 'HD1' :('F','H','W','Y','X'),'HD2' :('F','H','K','P','R','Y','X'),'HD3' :('K','P','R','X'), 'HD11':('I','L','X'),'HD12':('I','L','X'),'HD13':('I','L','X'), 'HD21':('L','N','X'),'HD22':('L','N','X'),'HD23':('L','X'), 'QD' :('P','R','K','N','L','Y','F','X'),'QR' :('Y','F','X'), 'MD' :('I','X'),'MD1' :('L','X'),'MD2' :('L','X'), 'CE' :('K','M','X'),'CE1' :('F','Y','H','X'),'CE2' :('F','W','Y','X'),'CE3' :('W','X'), 'CE*' :('F','Y','X'),'NE' :('R','X'),'NE1' :('W','X'),'NE2' :('Q','H','X'),'HE' :('R','X'), 'HE1' :('H','M','F','Y','W','X'),'HE2' :('H','K','M','F','Y','X'),'HE3' :('K','M','W','X'), 'HE21':('Q','X'),'HE22':('Q','X'),'QE' :('K','Q','Y','F','X'),'ME' :('M','X'), 'CZ' :('F','R','Y','X'),'CZ2':('W','X'),'CZ3':('W','X'),'NZ' :('K','X'), 'HZ' :('F','X'),'HZ1' :('K','X'),'HZ2' :('K','W','X'),'HZ3' :('K','W','X'),'QZ' :('K','X'), 'CH2' :('W','X'),'NH1' :('R','X'),'NH2' :('R','X'),'HH' :('Y','X'),'HH2' :('W','X'), 'HH11':('R','X'),'HH12':('R','X'),'HH21':('R','X'),'HH22':('R','X'), 'QH1' :('R','X'),'QH2' :('R','X'),'QH' :('R','X'), } self.H_12 = {'QG' :'V', 'QD' :('N','L','Y','F','X'), 'QE' :('Q','Y','F','X'), 'QH1':('R','X'), 'QH2':('R','X')} self.H_23 = {'QA':('G','X'), 'QB':('P','R','N','D','C','Q','E','H','L','K','M','F','S','T','W','Y','X'), 'QG':('P','R','Q','E','K','M','I','X'), 'QD':('P','R','K','X'), 'QE':('K','X')} self.H_1212 = {'QH':('R','X')} self.H_123 = {'QB' :('A','X'), 'MG' :('I','T','X'), 'MG1':('V','X'), 'MG2':('V','X'), 'MD1':('L','X'), 'MD2':('L','X') , 'ME' :('M','X'), 'QZ' :('K','X')} elif format == "BMRB": self.backbone = {'H1':self.alphabet, 'H2':self.alphabet, 'H3':self.alphabet, "H''":self.alphabet, 'O1':self.alphabet , 'O2':self.alphabet, 'HN':self.alphabet , 'C':self.alphabet , 'CA':self.alphabet , 'N':self.alphabet, 'O':self.alphabet, 'HA':('A','C','D','E','F','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y','X'), 'HA2':('G','X'), 'HA3':('G','X')} self.sidechain = { 'CB' :('A','C','D','E','F','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y','X'), 'HB' :('I','T','V','X'),'HB1':('A','X'), 'HB2':('A','C','D','E','F','H','K','L','M','N','P','Q','R','S','W','Y','X'), 'HB3':('A','C','D','E','F','H','K','L','M','N','P','Q','R','S','W','Y','X'),'HB' :('A','X'), 'CG' :('D','E','F','H','K','L','M','N','P','Q','R','W','Y','X'),'CG1' :('I','V','X'),'CG2' :('I','T','V','X'), 'HG' :('C','L','S','X'),'HG2' :('E','K','M','P','Q','R','X'),'HG3' :('E','K','M','P','Q','R','X'), 'HG11':('T','V','X'),'HG12':('I','V','X'),'HG13':('I','V','X'), 'HG21':('I','T','V','X'),'HG22':('I','T','V','X'),'HG23':('I','T','V','X'), 'HG1' :('V','X'),'HG2' :('I','T','V','X'), 'CD' :('E','K','P','Q','R','X'),'CD1' :('F','I','L','W','Y','X'),'CD2' :('F','H','L','W','Y','X'), 'ND1' :('H','X'),'ND2' :('N','X'), 'HD1' :('F','H','W','Y','X'),'HD2' :('F','H','K','P','R','Y','X'),'HD3' :('K','P','R','X'), 'HD11':('I','L','X'),'HD12':('I','L','X'),'HD13':('I','L','X'), 'HD21':('L','N','X'),'HD22':('L','N','X'),'HD23':('L','X'), 'HD1' :('I','L','X'),'HD2' :('L','X'), 'CE' :('K','M','X'),'CE1' :('F','Y','H','X'),'CE2' :('F','W','Y','X'),'CE3' :('W','X'),'CE*' :('F','Y','X'), 'NE' :('R','X'),'NE1' :('W','X'),'NE2' :('Q','H','X'), 'HE' :('R','X'),'HE1' :('H','M','F','Y','W','X'),'HE2' :('H','K','M','F','Y','X'),'HE3' :('K','M','W','X'), 'HE21':('Q','X'),'HE22':('Q','X'),'HE' :('M','X'), 'CZ' :('F','R','Y','X'),'CZ2':('W','X'),'CZ3':('W','X'),'NZ' :('K','X'), 'HZ' :('F','X'),'HZ1' :('K','X'),'HZ2' :('K','W','X'),'HZ3' :('K','W','X'),'HZ' :('K','X'), 'CH2' :('W','X'),'NH1' :('R','X'),'NH2' :('R','X'),'HH' :('Y','X'), 'HH2' :('W','X'),'HH11':('R','X'),'HH12':('R','X'),'HH21':('R','X'),'HH22':('R','X'), } self.H_12 = {'QG':'V' , 'QD' :('N','L','Y','F','X'), 'QE' :('Q','Y','F','X'), 'QH1':('R','X') , 'QH2':('R','X')} self.H_23 = {'QA':('G','X'), 'QB':('P','R','N','D','C','Q','E','H','L','K','M','F','S','T','W','Y','X'), 'QG':('P','R','Q','E','K','M','I','X'), 'QD':('P','R','K','X'), 'QE':('K','X')} self.H_1212 = {'QH':('R','X')} self.H_123 = {'QB' :('A','X'), 'MG' :('I','T','X'), 'MG1':('V','X'), 'MG2':('V','X'), 'MD1':('L','X'), 'MD2':('L','X') , 'ME' :('M','X'), 'QZ' :('K','X')} #=============================================================================== class InfoShift : #============================================================= """Class describing an atom""" def __init__(self, atom='X', rank=0, res='X', shift=0.0, sd=0.0, assign=0, ambig=0, state='not foud', redundancy=[]): self.res = res self.rank = rank self.atom = atom self.nuc = atom[0] self.shift = shift self.sd = sd self.assign = assign self.state = state self.ambiguity = ambig self.redundancy = redundancy #=============================================================================== class ResidueDict(UserDict) : #================================================= """Class describing an amino acid""" def __init__(self) : #______________________________________________________ UserDict.__init__(self) def setAtom(self, rank=0, atom='X', res='X', shift=0.0, sd=0.0, assign=0, ambig=0, state='not found', redundancy=[]): if self.get(atom, None) and self[atom].redundancy == []: self[atom].redundancy = [(self[atom].shift, self[atom].sd, self[atom].assign), (shift, sd, assign)] elif self.get(atom, None) and self[atom].redundancy != []: self[atom].redundancy = self[atom].redundancy + [(shift, sd, assign)] else : self.setdefault(atom, InfoShift(rank = rank , atom = atom, res = res , shift = shift, sd = sd , assign = assign, ambig = ambig, state =state, redundancy = [])) def expand_pseudoatom(self, lex) : #________________________________________ for atom in self.keys(): rank = self[atom].rank residue = self[atom].res name = self[atom].atom shift,sd, assg = self[atom].shift, self[atom].sd, self[atom].assign tmp = re.sub('^[QM]', 'H', re.sub('#', '', re.sub('\*', '',name))) if lex.H_23.has_key(name) and residue in lex.H_23[name] : self.setAtom(rank, tmp+'2', residue, shift, sd, assg, 1) self.setAtom(rank, tmp+'3', residue, shift, sd, assg, 1) del self[atom] elif lex.H_12.has_key(name) and residue in lex.H_12[name] : self.setAtom(rank, tmp+'1', residue, shift, sd, assg, 1) self.setAtom(rank, tmp+'2', residue, shift, sd, assg, 1) del self[atom] elif lex.H_1212.has_key(name) and residue in lex.H_12_12[name] : self.setAtom(rank, tmp+'11', residue, shift, sd, assg, 1) self.setAtom(rank, tmp+'12', residue, shift, sd, assg, 1) self.setAtom(rank, tmp+'21', residue, shift, sd, assg, 1) self.setAtom(rank, tmp+'22', residue, shift, sd, assg, 1) del self[atom] def convert2bmrb(self, lex, format) : #_____________________________________ for atom in self.keys(): rank = self[atom].rank residue = self[atom].res name = self[atom].atom shift,sd, assg = self[atom].shift, self[atom].sd, self[atom].assign tmp = re.sub('^[QM]', 'H', re.sub('#', '', re.sub('\*', '',name))) if lex.H_123.has_key(name) and residue in lex.H_123[name] and format not in ('IUPAC', 'BMRB'): self.setAtom(rank, tmp, residue, shift, sd, assg, 1) del self[atom] elif lex.H_123.has_key(name) and residue in lex.H_123[name] and format == 'IUPAC': if name == 'MG' and residue in ('I', 'T'): self.setAtom(rank, 'HG2', residue, shift, sd, assg, 1) del self[atom] else : self.setAtom(rank, tmp, residue, shift, sd, assg, 1) del self[atom] #=============================================================================== class ShiftDict(UserDict) : #=================================================== """Class describing an amino acid sequence""" def __init__(self, seq_len): #______________________________________________ UserDict.__init__(self) for i in range(1, seq_len+1) : self.setdefault(i, ResidueDict()) def setResidue(self, rank=0, atom='X', res='X', shift=0.0, sd=0.0, assign=0, state='not found'): self[rank].setAtom(rank = rank , atom = atom, res = res , shift = shift, sd = sd , assign = assign, state = state, redundancy = []) def fixRedundant(self, selected): #_________________________________________ for values in selected : rank = int(values[0]) res = values[1] shift = float(values[2]) sd = float(values[3]) assign = int(values[4]) if self.has_key(rank) and self[rank].has_key(res) : self[rank][res].shift = shift self[rank][res].sd = sd self[rank][res].assign = assign self[rank][res].redundancy = [] def remove(self, selected): #_______________________________________________ for values in selected : rank = int(values[0]) atom = values[1] state = values[-1] if state == 'r' and self.has_key(rank) and self[rank].has_key(atom): del self[rank][atom] elif self.get(rank,None) is None : del self[rank] def duplicatedAtom(self): #_________________________________________________ duplicated = [] for rank in self.keys() : for atom in self[rank].keys() : if self[rank][atom].redundancy != [] : duplicated.append(self[rank][atom]) return duplicated def upperThanThreshold(self, threshold): #__________________________________ deviant = [] for rank in self.keys(): for shift in self[rank].values(): if shift.nuc == "C" and shift.sd >= threshold['C'] : deviant.append(shift) elif shift.nuc == "H" and shift.sd >= threshold['H'] : deviant.append(shift) elif shift.nuc == "N" and shift.sd >= threshold['N'] : deviant.append(shift) return deviant def checkNomenclature(self, lexic): #_______________________________________ bad_nomenclature = [] for rank in self.keys(): for shift in self[rank].values(): if lexic.backbone.has_key(shift.atom) and shift.res not in lexic.backbone.get(shift.atom, ()): bad_nomenclature.append(shift) elif lexic.sidechain.has_key(shift.atom) and shift.res not in lexic.sidechain.get(shift.atom, ()) : bad_nomenclature.append(shift) return bad_nomenclature def convert2IUPAC(self, old_atom, nomenclature): #__________________________ residue = old_atom.res atom = old_atom.atom if nomenclature == "XPLOR": #N- and C-terminal atoms if atom == 'HN' : atom = 'H' elif re.match('[OH]T[123]', atom) : atom = re.sub('T', '', atom) #converting HB2 in HB3 and HB1 in HB2 (stereospecificity is conserved) if residue in ('P','R','N','D','C','Q','E','H','L','M','F','S','T','W','Y') and atom == 'HB2' : atom = 'HB3' elif residue in ('P','R','N','D','C','Q','E','H','L','M','F','S','T','W','Y') and atom == 'HB1' : atom = 'HB2' elif residue in ('P','R','Q','E','K','M') and atom == 'HG2' : atom = 'HG3' elif residue in ('P','R','Q','E','K','M') and atom == 'HG1' : atom = 'HG2' elif residue in ('P','R','K') and atom == 'HD2' : atom = 'HD3' elif residue in ('P','R','K') and atom == 'HD1' : atom = 'HD2' elif residue == 'K' and atom == 'HE2' : atom = 'HE3' elif residue == 'K' and atom == 'HE1' : atom = 'HE2' elif residue == 'G' and atom == 'HA2' : atom = 'HA3' elif residue == 'G' and atom == 'HA1' : atom = 'HA2' elif residue == 'I' and atom == 'HG12': atom = 'HG13' elif residue == 'I' and atom == 'HG11': atom = 'HG12' if nomenclature == "UCSF": #N- and C-terminal atoms and H link to N, S, or O if re.match('HN[DEZ]?[123]*', atom) : atom = re.sub('N', '', atom) if re.match('H[OS]G1?', atom) : atom = re.sub('[OS]', '', atom) #converting HB2 in HB3 and HB1 in HB2 (stereospecificity is conserved) if residue in ('P','R','N','D','C','Q','E','H','L','M','F','S','T','W','Y') and atom == 'HB2' : atom = 'HB3' elif residue in ('P','R','N','D','C','Q','E','H','L','M','F','S','T','W','Y') and atom == 'HB1' : atom = 'HB2' elif residue in ('P','R','Q','E','K','M') and atom == 'HG2' : atom = 'HG3' elif residue in ('P','R','Q','E','K','M') and atom == 'HG1' : atom = 'HG2' elif residue in ('P','R','K') and atom == 'HD2' : atom = 'HD3' elif residue in ('P','R','K') and atom == 'HD1' : atom = 'HD2' elif residue == 'K' and atom == 'HE2' : atom = 'HE3' elif residue == 'K' and atom == 'HE1' : atom = 'HE2' elif residue == 'G' and atom == 'HA2' : atom = 'HA3' elif residue == 'G' and atom == 'HA1' : atom = 'HA2' elif residue == 'I' and atom == 'HG12': atom = 'HG13' elif residue == 'I' and atom == 'HG11': atom = 'HG12' return atom #=[ FILE/LIST/DICTIONARY PARSING METHODS ]====================================== #=============================================================================== def read_STAR(filename) : #===================================================== """Function used to parse an BMRB-STAR template file""" sequence = "" end = 0 try: input = open(filename, 'r') except: sys.stderr.write("\nCan't open NMR STAR template file ... check %s\n"%filename) return sequence, ShiftDict(len(sequence)) #SEQUENCE RETRIEVAL IN THE STAR FILE ======================================= #sequence is firstly search ino the file while end == 0 : line = input.readline() if not line: break if line == "_Mol_residue_sequence\n" : line = input.readline() sequence += input.readline().strip() while ";" not in sequence : sequence += input.readline().strip() sequence = sequence[:-1] end = 1 #INITIALIZATION OF THE DICTIONNARY AND NMR STAR FILE READING =============== #initialization star_dict = ShiftDict(len(sequence)) input.seek(0) #NMR STAR reading while 1: line = input.readline() if not line: break #NMR STAR format #Atom Residue #shift Seq Residue Atom Atom Shift/ Error/ Ambiguity #assign code Label Name Type ppm ppm Code #--------------------------------------------------------------- #1 2 GLY H H @ @ @ #2 2 GLY HA2 H @ @ @ #3 2 GLY HA3 H @ @ @ #4 2 GLY C C @ @ @ #5 2 GLY CA C @ @ @ #6 2 GLY N N @ @ @ while re.match('\d+\s+\d+\s+[A-Z]{3}', line) : line = line.split() rank = int(line[1]) res = line[2] atom = line[3] star_dict.setResidue(rank, atom, res, 0.0, 0.0, 0) line = input.readline() if not line: break input.close() return star_dict, sequence #=============================================================================== def split_STAR(project, condition, filename) : #================================= """Function used to retrieved NMR-STAR template file header and footer""" try : input = open(filename, 'r') except: sys.stderr.write("\nCan't open NMR STAR template file ... check %s\n"%filename) return sequence, ShiftDict(len(sequence)) header = [] footer = [] head = 1 foot = 0 end = 1 while end == 1 : line = input.readline() if not line: break while re.match('\d+\s+\d+\s+[A-Z]{3}', line) : head = 0 foot = 1 line = input.readline() if not line: break if head == 1 and foot == 0 : header.append(line) elif head == 0 and foot == 1 : footer.append(line) input.close() #Added Comments header.insert(-7, "# === FILE GENERATED FROM THE \"BMRB submission form creation\" SPARKY EXTENSION ===\n") header.insert(-7, "# > Some bug are still possible : don't forget to check atom names!\n") header.insert(-7, "# > Chemical shift list generated from the following sparky project :\n") header.insert(-7, "# - project : %s\n"%project.save_path) header.insert(-7, "# - condition : %s\n"%condition.name) header.insert(-7, "# - spectrum list : \n") for spectrum in project.spectrum_list(): header.insert(-7, "# * %s from <%s>\n"%(spectrum.name, spectrum.data_path)) header.insert(-7, "\n") return header, footer #=============================================================================== def ResonanceList2ShiftDict(resonance_list = None) : #================================= """Function used to parse a Sparky chemical shift list into a ResidueDict object""" seq_dict = {} seq_string = '' rank_max = 0 try : resonance_list = filter(lambda x : (x.atom.name and x.atom.name != '?' ), resonance_list) resonance_list = filter(lambda x : (x.group.number), resonance_list) resonance_list = filter(lambda x : (x.peak_count != 0), resonance_list) except : return seq_string, ShiftDict(len(seq_string)) for resonance in resonance_list : rank = resonance.group.number res = resonance.group.symbol.upper() if rank > rank_max : rank_max = rank seq_dict[rank] = seq_dict.get(rank, res) for i in range(1, rank_max+1) : seq_string += seq_dict.get(i,'X') shift_dict = ShiftDict(len(seq_string)) for resonance in resonance_list : rank = resonance.group.number res = resonance.group.symbol.upper() atm = resonance.atom.name.upper() shift = resonance.frequency sd = resonance.deviation assign = resonance.peak_count if res == None or res == '?' or res == '' : res = 'X' elif re.match('[A-Z]{1}', res) and not one2three_letter_code.has_key(res) : res = 'X' elif re.match('[A-Z]{3}\d+', res) : res = three2one_letter_code.get(res, 'X') shift_dict.setResidue(rank=rank, atom=atm, res=res, shift=shift, sd=sd, assign=assign) return seq_string, shift_dict #=============================================================================== def assign_ambiguity(res_dict): #=============================================== """Function used to assign a BMRB ambiguity code""" for atom in res_dict.keys(): res = res_dict[atom].res atm = res_dict[atom].atom if atm in ('H1','H2','H3','O1','O2') : res_dict[atom].ambiguity = 2 elif ambiguity2_CH.has_key(atm) and res in ambiguity2_CH[atm] : res_dict[atom].ambiguity = 2 elif ambiguity2_ONH.has_key(atm) and res == ambiguity2_ONH[atm] : res_dict[atom].ambiguity = 2 elif ambiguity3.has_key(atm) and res in ambiguity3[atm] : res_dict[atom].ambiguity = 3 elif ambiguity4.has_key(atm) and res == ambiguity4[atm] : res_dict[atom].ambiguity = 4 else : res_dict[atom].ambiguity = 1 #=============================================================================== def atom_retrieval(user_resdict, temp_resdict): #========================================= """Function used to retrieved atoms of a user residue dictionary into a template residue dictionary""" found = 0 #search atom from the user defined residue into the template for atom in user_resdict.keys(): atm_name = user_resdict[atom].atom if temp_resdict.has_key(atm_name) : user_resdict[atom].state = 'found' else : user_resdict[atom].state = 'not found' #search atom from the template defined residue into the user defined residue for atom_temp in temp_resdict.keys(): atm_name_temp = temp_resdict[atom_temp].atom short_atm_name_temp = re.sub('\d$', '', atm_name_temp) found = 0 #search in user residue_dict>atom names for atom_user in user_resdict.keys() : atm_name_user = user_resdict[atom_user].atom if atm_name_user == atm_name_temp or atm_name_user == atm_name_temp : found = 1 #pass #if tha atom name is not found, you have to add it! if found == 0 : user_resdict.setAtom(rank = temp_resdict[atom_temp].rank, atom = temp_resdict[atom_temp].atom, res = three2one_letter_code[temp_resdict[atom_temp].res], shift = 0.0, sd = 0.0, assign = 0, ambig = 1, state = 'added', redundancy=[]) #=[ USER INTERFACE SUBROUTINE ]================================================= def show_dialog(session): d = sputil.the_dialog(sparky2bmrb_Dialog, session) d.show_window(1)