#! /bin/env python import string, sys, os, getopt, re from UserDict import UserDict usage_header = """ __oOo_(^_^)_oOo____ Version 1.0 (sept 2004) - provided by Nicolas Sapay (n.sapay@ibcp.fr) ____ SYNOPSIS : ========== sparky2bmrb is a python script providing methods to convert a Sparky chemical shift list in a NMR-STAR Data file compatible with the BMRB submission system. sparky2bmrb needs an NMR-STAR template file and a Spaky chemical shift list. Template file can be found at [http://www.bmrb.wisc.edu/elec_dep/gen_aa.html]. Sparky chemical shift list can be generate from Sparky using the commande (). """ usage = """ USAGE : ======= sparky2bmrb.py [OPTION] OPTION : ======== -h|--help Print this usage and option -i|-a Execution mode, i.e. interactive (<-i>) or automatic (<-a>). Default is interactive. If automatic, minimum question will be ask to the user. --[iupac|xplor|ucsf] Atom nomenclature used in the Sparky chemical shift list. Default is xplor. --[H|C|N|O|S|P] Threshold value used to detect and remove resonnaces with large standard-deviation. Default is 0.005 for H nucleus, 0.1 for C nucleus and 0.01 for N and O nucleus -o|--[output|out] Name of the output file. If not define, Sparky chemical shift list file name will be used with the .bmrb extension. -l|--log Name of the file where remaining_data are written. those data correspond to chemical shifts excluded from the list during the process. Default is "remaining_data.log" EXAMPLES : ========== The basic commande line : sparky2bmrb.py template.star myshift.shifts Force the format to be xplor, set the C nucleus threshold to 0.2 ppm and set the outfile to "myfile.bmrb" : sparky2bmrb.py --xplor --C=0.2 -o myfile.bmrb template.star myshift.shifts """ #=[ OPTION PROCESSING ]========================================================= interactive = "i" format = "xplor" threshold = {'C' : 0.1, 'H' : 0.005, 'N' : 0.01, 'O' : 0.01, 'P' : 0.01, 'S' : 0.01} bmrb_file = "" log_file = "" try : opts, args = getopt.getopt(sys.argv[1:], "hiao:l:", ["help", "iupac", "xplor", "ucsf", "H=", "C=", "N=", "O=", "S=", "P=", "log=", "output=", "out="]) except : print usage_header, usage sys.exit(0) if len(sys.argv) < 2 : print usage_header, usage sys.exit(0) for o, a in opts: if o in ("-h", "--help"): print usage sys.exit(0) elif o == "-i" : interactive = "i" elif o == "-a" : interactive = "a" elif o == "--iupac" : format = "iupac" elif o == "--xplor" : format = "xplor" elif o == "--ucsf" : format = "uscf" elif o == "--iupac" : format = "iupac" elif o == "--H" : threshold['H'] = float(a) elif o == "--C" : threshold['C'] = float(a) elif o == "--N" : threshold['N'] = float(a) elif o == "--O" : threshold['O'] = float(a) elif o == "--S" : threshold['P'] = float(a) elif o == "--P" : threshold['P'] = float(a) elif o in ("-o", "--output", "--out"): bmrb_file = str(a) elif o in ("-l", "--log") : log_file = str(a) #=[ FILES PROCESSING ]==================================================== shift_file = sys.argv[-1] try: open(shift_file, 'r') except: sys.stderr.write("\n(+_+)~ can't open Sparky chemical shifts list ..... check %s\n" % shift_file) exit(0) star_file = sys.argv[-2] try: open(star_file, 'r') except: sys.stderr.write("\n(+_+)~ can't open NMR-STAR template file ..... check %s\n" % star_file) exit(0) if bmrb_file == "" : bmrb_file = shift_file.split(".")[0]+".bmrb" try: open(bmrb_file, 'w') except: sys.stderr.write("\n(+_+)~ can't open BMRB output file ..... check %s\n" % bmrb_file) exit(0) if log_file == "" : log_file = "remaining_data.log" try: open(log_file, 'w') except: sys.stderr.write("\n(+_+)~ can't open log file ..... check %s\n" % log_file) exit(0) #=[ ALPHABET ]================================================================== one2three_letter_code = { 'A': 'ALA', 'C': 'CYS', 'D': 'ASP', 'E': 'GLU', 'F': 'PHE', 'G': 'GLY', 'H': 'HIS', 'I': 'ILE', 'K': 'LYS', 'L': 'LEU', 'M': 'MET', 'N': 'ASN', 'P': 'PRO', 'Q': 'GLN', 'R': 'ARG', 'S': 'SER', 'T': 'THR', 'V': 'VAL', 'W': 'TRP', 'Y': 'TYR'} three2one_letter_code = { 'ALA': 'A', 'CYS': 'C', 'ASP': 'D', 'GLU': 'E', 'PHE': 'F', 'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LYS': 'K', 'LEU': 'L', 'MET': 'M', 'ASN': 'N', 'PRO': 'P', 'GLN': 'Q', 'ARG': 'R', 'SER': 'S', 'THR': 'T', 'VAL': 'V', 'TRP': 'W', 'TYR': 'Y'} #=[ AMBIGUITY LEXICAL RULES BASED ON IUPAC NOMENCLATURE]======================== ambiguity2_CH = {'HA2' :('G'), 'HA3' :('G'), 'HB1' :('A'), 'HB2' :('A','P','R','N','D','C','Q','E','H','L','K','M','F','S','W','Y'), 'HB3' :('A','P','R','N','D','C','Q','E','H','L','K','M','F','S','W','Y'), 'HG2' :('P','R','Q','E','K','M'), 'HG3' :('P','R','Q','E','K','M'), 'HG11':('V') , 'HG12':('V','I') , 'HG13':('V','I'), 'HG21':('I','T','V') , 'HG22':('I','T','V'), 'HG23':('I','T','V'), 'HD2' :('P','R','K') , 'HD3' :('P','R','K'), 'HD11':('I','L') , 'HD12':('I','L') , 'HD13':('I','L'), 'HD21':('L') , 'HD22':('L') , 'HD23':('L'), 'HE1' :('M') , 'HE2' :('K','M') , 'HE3' :('K','M'), 'CG1' :('V') , 'CG2' :('V') , 'CD1' :('L') , 'CD2' :('L')} ambiguity2_ONH = {'NH1':'R', 'NH2':'R', 'HZ1' :'K', 'HZ2' :'K', 'HZ3' :'K', 'HD21':'N', 'HD22':'N', 'HE21':'Q', 'HE22':'Q'} ambiguity3 = {'HD1':('F','Y'), 'HD2':('F','Y'), 'CD1':('F'), 'CD2':('F','Y'), 'HE1':('F','Y'), 'HE2':('F','Y'), 'CE1':('F'), 'CE2':('F','Y')} ambiguity4 = {'HH11':'R', 'HH12':'R', 'HH21':'R', 'HH22':'R'} #=[ NOMENCLATURE SPECIFIC LEXICAL RULES ]======================================= class UserLex(UserDict): def __init__(self, format): UserDict.__init__(self) if format == "xplor": self.H_12 = {'HG*' :('V'), 'CG*' :('V'), 'HD2#':('N'), 'HD*':('Y','F','L'), 'CD*':('Y','F'), 'HE2#':('Q'), 'HE*':('Y','F') , 'CE*':('Y','F'), 'HH1#':('R'), 'HH2#':('R')} self.H_23 = {'HA#':('G'), 'HB#':('P','R','N','D','C','Q','E','H','L','K','M','F','S','T','W','Y'), 'HG#':('P','R','Q','E','K','M') , 'HG1#':('I'), 'HD#':('P','R','K') , 'HE#':('K')} self.pseudoatom = {'HB#':('A') , 'HG1#':('V'), 'HG2#':('V','I','T'), 'HD1#':('I','L'), 'HD2#':('L'), 'HE#':('M') , 'HZ#':('K')} self.H_1212 = {'HH*':('R')} elif format == "ucsf": self.H_12 = {'QG' :('V'), 'QD' :('N','L'), 'RD' :('Y','F'), 'QE' :('Q'), 'RE' :('Y','F'), 'QH1':('R') , 'QH2':('R')} self.H_23 = {'QA':('G'), 'QB':('P','R','N','D','C','Q','E','H','L','K','M','F','S','T','W','Y'), 'QG':('P','R','Q','E','K','M','I'), 'QD':('P','R','K'), 'QE':('K')} self.H_1212 = {'QH':('R')} self.pseudoatom = {'QB':('A') , 'MG1':('V'), 'MG2':('I','V','T'), 'MD1':('I','L'), 'MD2':('L'), 'ME':('M') , 'QZ':('K')} elif format == "iupac": self.H_12 = {'QG':'V' , 'QD' :('N','L'), 'QD' :('Y','F'), 'QE' :('Q'), 'QE' :('Y','F'), 'QH1':('R') , 'QH2':('R')} self.H_23 = {'QA':('G'), 'QB':('P','R','N','D','C','Q','E','H','L','K','M','F','S','T','W','Y'), 'QG':('P','R','Q','E','K','M','I'), 'QD':('P','R','K'), 'QE':('K')} self.H_1212 = {'QH':('R')} self.pseudoatom = {'QB' :('A'), 'MG' :('I','T'), 'MG1':('V'), 'MG2':('V'), 'MD1':('L'), 'MD2':('L') , 'ME' :('M'), 'QZ' :('K')} #=[ CLASS DEFINITIONS ]========================================================= class InfoShift : """Class describing an atom and its chemical shift""" def __init__(self, atom= 'X', rank=0, res= 'X', shift= 0.0, sd= 0.0, assignment= 0, ambig=0, state='n'): self.res = res self.rank = rank self.atom = atom self.nuc = atom[0] self.shift = shift self.sd = sd self.assign = assignment self.state = 'n' #n: not found, f:found, a:add to the list self.ambiguity = ambig class ResidueDict(UserDict) : """Class describing atoms of a residue""" def __init__(self) : UserDict.__init__(self) def setAtom(self, rank=0, atom='X', res='X', shift=0.0, sd=0.0, assign=0, ambig=0, state='n'): if self.get(atom, None) : sys.stderr.write(" [ %s%d-%s ] :: atom duplication in residue %s%s\n"%(res,rank,atom,res,rank)) choice = raw_input("\t- use 1)%s%d-%s= %.3f +/-%.3f ppm or 2)%s%d-%s= %.3f +/-%.3f ppm? (1/2) "%\ (self[atom].res, self[atom].rank, self[atom].atom, self[atom].shift, self[atom].sd, res, rank, atom, shift, sd)) if choice == "2" : self[atom] = InfoShift(atom, rank, res, shift, sd, assign, ambig, state) else : self.setdefault(atom, InfoShift(atom, rank, res, shift, sd, assign, ambig, state)) def expand_pseudoatom(self, lex) : residue = self.values()[0].res rank = self.values()[0].rank for atom in self.keys(): shift,sd, assg = self[atom].shift, self[atom].sd, self[atom].assign tmp = re.sub('^Q', 'H', re.sub('#', '', re.sub('\*', '',atom))) if lex.H_23.has_key(atom) and residue in lex.H_23[atom] : self.setAtom(rank, tmp+'2', residue, shift, sd, assg, 1) self.setAtom(rank, tmp+'3', residue, shift, sd, assg, 1) del self[atom] elif lex.H_12.has_key(atom) and residue in lex.H_12[atom] : self.setAtom(rank, tmp+'1', residue, shift, sd, assg, 1) self.setAtom(rank, tmp+'2', residue, shift, sd, assg, 1) del self[atom] elif lex.H_1212.has_key(atom) and residue in lex.H_12_12[atom] : self.setAtom(rank, tmp+'11', residue, shift, sd, assg, 1) self.setAtom(rank, tmp+'12', residue, shift, sd, assg, 1) self.setAtom(rank, tmp+'21', residue, shift, sd, assg, 1) self.setAtom(rank, tmp+'22', residue, shift, sd, assg, 1) del self[atom] def convert2bmrb(self, lex, format) : residue = self.values()[0].res rank = self.values()[0].rank for atom in self.keys(): shift,sd, assg = self[atom].shift, self[atom].sd, self[atom].assign tmp = re.sub('^Q', 'H', re.sub('#', '', re.sub('\*', '',atom))) if lex.pseudoatom.has_key(atom) and residue in lex.pseudoatom[atom] and format != 'iupac': self.setAtom(rank, tmp, residue, shift, sd, assg, 1) del self[atom] elif lex.pseudoatom.has_key(atom) and residue in lex.pseudoatom[atom] and format == 'iupac': if atom == 'MG' and residue in ('I', 'T'): self.setAtom(rank, 'HG2', residue, shift, sd, assg, 1) del self[atom] else : self.setAtom(rank, tmp, residue, shift, sd, assg, 1) del self[atom] class ShiftDict(UserDict) : """Class describing an amino acid sequence""" def __init__(self, seq_len): UserDict.__init__(self) for i in range(1, seq_len+1) : self.setdefault(i, ResidueDict()) def setResidue(self, rank=0, atom='X', res='X', shift=0.0, sd=0.0, assign=0, state='n'): self[rank].setAtom(rank, atom, res, shift, sd, assign, state) def read_STAR(filename) : """Function used to parse an NMR-STAR template file""" sequence = "" input = open(filename, 'r') end = 0 #SEQUENCE RETRIEVAL IN THE NMR STAR FILE #sequence is firstly search ino the file while end == 0 : line = input.readline() if not line: break if line == "_Mol_residue_sequence\n" : line = input.readline() sequence += input.readline().strip() while ";" not in sequence : sequence += input.readline().strip() sequence = sequence[:-1] end = 1 #if the sequence is not found if sequence == '': sequence = raw_input(" > Please enter your sequence in one-letter code : \n") #INITIALIZATION OF THE DICTIONNARY AND NMR STAR FILE READING #initialization star_dict = ShiftDict(len(sequence)) input.seek(0) #NMR STAR reading while 1: line = input.readline() if not line: break #NMR STAR format #Atom Residue #shift Seq Residue Atom Atom Shift/ Error/ Ambiguity #assign code Label Name Type ppm ppm Code #--------------------------------------------------------------- #1 2 GLY H H @ @ @ #2 2 GLY HA2 H @ @ @ #3 2 GLY HA3 H @ @ @ #4 2 GLY C C @ @ @ #5 2 GLY CA C @ @ @ #6 2 GLY N N @ @ @ while re.match('\d+\s+\d+\s+[A-Z]{3}', line) : line = string.split(line) rank = int(line[1]) res = line[2] atom = line[3] #if star_dict[rank].get(atom, None): # sys.stderr.write(" > %3s%4d : atom %4s duplicated in the list >> ONLY FIRST USED! <<\n"%(res, rank, atom)) #else : # star_dict[rank].setdefault(atom, shift(res=res, atom=atom, rank=rank, shift=0.0, sd=0.0, assignment=0)) star_dict.setResidue(rank, atom, res, 0.0, 0.0, 0) line = input.readline() if not line: break input.close() return star_dict, sequence def split_STAR(filename) : """Function used to retrieved NMR-STAR template file header and footer""" input = open(filename, 'r') header = [] footer = [] head = 1 foot = 0 end = 1 while end == 1 : line = input.readline() if not line: break while re.match('\d+\s+\d+\s+[A-Z]{3}', line) : head = 0 foot = 1 line = input.readline() if not line: break if head == 1 and foot == 0 : header.append(line) elif head == 0 and foot == 1 : footer.append(line) input.close() return header, footer def read_SPARKY(filename, sequence) : """Function used to parse a Sparky chemical shift list""" spark_dict = ShiftDict(len(sequence)) input = open(filename, 'r') while 1: line = input.readline() if not line: break while re.match('\s*[A-Za-z]+\s+[A-Za-z]+\s+[A-Za-z]+', line) or line.isspace() == 1 or line == '': line = input.readline() if not line: break line = string.split(line.upper()) if line[0].isdigit() : rank = int(line[0]) res = sequence[rank - 1] spark_dict.setResidue(rank, line[1], res, float(line[3]), float(line[4]), int(line[5])) elif re.match('[A-Z]\d+', line[0]) : rank = int(re.sub('[A-Z]+', '', line[0])) res = re.sub('\d+', '', line[0]) spark_dict.setResidue(rank, line[1], res, float(line[3]), float(line[4]), int(line[5])) elif re.match('[A-Z]{3}\d+', line[0]) : rank = int(re.sub('[A-Z]+', '', line[0])) res = three2one_letter_code[re.sub('\d+', '', line[0])] spark_dict.setResidue(rank, line[1], res, float(line[3]), float(line[4]), int(line[5])) else : sys.stderr.write("\n[ FILE FORMAT ERROR ].... check %s in file %s\n" % string.join(line), input.name) input.close() return spark_dict def remove_deviant(shift_dict, threshold, mode): """Function used to remove shift with large standard deviation""" deviant = {} if mode == "a" : for rank in shift_dict.keys(): for atom in shift_dict[rank].values(): if atom.nuc == "C" and atom.sd >= threshold['C']: deviant.setdefault(rank, {}).setdefault(atom.atom, atom) del shift_dict[rank][atom.atom] elif atom.nuc == "H" and atom.sd >= threshold['H']: deviant.setdefault(rank, {}).setdefault(atom.atom, atom) del shift_dict[rank][atom.atom] elif mode == "i" : for rank in shift_dict.keys(): for atom in shift_dict[rank].values(): tmp_string = "[ %s%d-%s ]" % (atom.res, atom.rank, atom.atom) atom_string = "%-15s :: %3.2f +/-%3.2f ppm assigned %dx" % \ (tmp_string, atom.shift, atom.sd, atom.assign) if atom.nuc == "C" and atom.sd >= threshold['C']: choice = raw_input("\t- %s (threshold= %.3f ppm). Remove it? (y/n) "%(atom_string, threshold['C'])) if choice in ('y', 'Y', 'yes', 'YES', 'YEs', 'Yes', 'yES', 'yeS') : deviant.setdefault(rank, {}).setdefault(atom.atom, atom) del shift_dict[rank][atom.atom] elif atom.nuc == "H" and atom.sd >= threshold['H']: choice = raw_input("\t- %s (threshold= %.3f ppm). Remove it? (y/n) "%(atom_string, threshold['H'])) if choice in ('y', 'Y', 'yes', 'YES', 'YEs', 'Yes', 'yES', 'yeS') : deviant.setdefault(rank, {}).setdefault(atom.atom, atom) del shift_dict[rank][atom.atom] return deviant #Readings of NOEs in a Sparky formated file_________________________________ def ucsf2bmrb(shift_dict) : """Function used to convert UCSF atom names in IUPAC atom names""" if re.match('HN[DEZ]?[123]*', atom) : atom = re.sub('N', '', atom) if re.match('H[OS]G1?', atom) : atom = re.sub('[OS]', '', atom) #converting HB1 in HB3 if atom.res in ('P','R','N','D','C','Q','E','H','L','M','F','S','T','W','Y') and atom == 'HB2' : atom = 'HB3' elif atom.res in ('P','R','N','D','C','Q','E','H','L','M','F','S','T','W','Y') and atom == 'HB1' : atom = 'HB2' elif residue in ('P','R','Q','E','K','M') and atom == 'HG2' : atom = 'HG3' elif residue in ('P','R','Q','E','K','M') and atom == 'HG1' : atom = 'HG2' elif residue in ('P','R','K') and atom == 'HD2' : atom = 'HD3' elif residue in ('P','R','K') and atom == 'HD1' : atom = 'HD2' elif residue == 'K' and atom == 'HE2' : atom = 'HE3' elif residue == 'K' and atom == 'HE1' : atom = 'HE2' elif residue == 'G' and atom == 'HA2' : atom = 'HA3' elif residue == 'G' and atom == 'HA1' : atom = 'HA2' elif residue == 'I' and atom == 'HG12': atom = 'HG13' elif residue == 'I' and atom == 'HG11': atom = 'HG12' return atom def xplor2bmrb(old_atom) : """Function used to convert XPLOR atom names in IUPAC atom names""" residue = old_atom.res atom = old_atom.atom #N- and C-terminal atoms if atom == 'HN' : atom = 'H' elif re.match('[OH]T[123]', atom) : atom = re.sub('T', '', atom) #converting HB1 in HB3 if residue in ('P','R','N','D','C','Q','E','H','L','M','F','S','T','W','Y') and atom == 'HB1' : atom = 'HB3' elif residue in ('P','R','Q','E','K','M') and atom == 'HG1' : atom = 'HG3' elif residue in ('P','R','K') and atom == 'HD1' : atom = 'HD3' elif residue == 'K' and atom == 'HE1' : atom = 'HE3' elif residue == 'G' and atom == 'HA1' : atom = 'HA3' elif residue == 'I' and atom == 'HG11': atom = 'HG13' return atom def assign_ambiguity(res_dict): """Function used to assign a BMRB ambiguity code""" residue = res_dict.values()[0].res for atom in res_dict.keys(): if atom in ('H1','H2','H3','O1','O2') : res_dict[atom].ambiguity = 2 elif ambiguity2_CH.has_key(atom) and residue in ambiguity2_CH[atom] : res_dict[atom].ambiguity = 2 elif ambiguity2_ONH.has_key(atom) and residue == ambiguity2_ONH[atom] : res_dict[atom].ambiguity = 2 elif ambiguity3.has_key(atom) and residue in ambiguity3[atom] : res_dict[atom].ambiguity = 3 elif ambiguity4.has_key(atom) and residue == ambiguity4[atom] : res_dict[atom].ambiguity = 4 else : res_dict[atom].ambiguity = 1 def atom_retrieval(res_dict1, res_dict2): """Function used to retrieved atoms form a Sparky list into a NMR-STAR template file""" residue1 = res_dict1.values()[0].res residue2 = three2one_letter_code.get(res_dict2.values()[0].res, 'X') if residue1 != residue2 : sys.stderr.write("\t- [ rank %4d ] :: residue is different between NMR STAR file (%s) and Sparky list (%s)"% res_dict1.values()[0].rank, residue1, residue2) for atom1 in res_dict1.keys(): if res_dict2.has_key(atom1) : res_dict1[atom1].state = 'f' else : res_dict1[atom1].state = 'a' for atom2 in res_dict2.values(): if not res_dict1.has_key(atom2.atom) : res_dict1.setAtom(atom2.rank, atom2.atom, residue2, 0.0, 0.0, 9, 'n') #=[ MAIN ]====================================================================== lexic = UserLex(format) #input file reading sys.stdout.write("\n> Template NRM STAR file reading ..............................\n") star, sequence = read_STAR(star_file) sys.stdout.write("\n> Sparky chemical shift list reading ..........................\n") sparky = read_SPARKY(shift_file, sequence) #Atom name conversion sys.stdout.write("\n> Atom name conversion to IUPAC nomenclature...................\n") if format != "iupac": sys.stdout.write("\n > translation from %s to IUPAC nomenclature\n\n" % format.upper()) for rank in sparky.keys(): for atom in sparky[rank].keys(): new_atom = '' if format == "xplor" : new_atom = xplor2bmrb(sparky[rank][atom]) if format == "ucsf" : new_atom = ucsf2bmrb(sparky[rank][atom]) if new_atom != '' and new_atom != atom : sparky.setResidue(rank, new_atom, sparky[rank][atom].res, sparky[rank][atom].shift, sparky[rank][atom].sd , sparky[rank][atom].assign) del sparky[rank][atom] else : sys.stdout.write(" > no translation to IUPAC nomenclature needed\n\n") #Ambiguity code assignment sys.stdout.write(" > ambiguity code assignment\n") sys.stdout.write("\t- default ambiguity code 2 : -CH2, -CH3, -NH2 and -NH3\n") sys.stdout.write("\t- default ambiguity code 3 : aromatic C and H from Y and F\n") sys.stdout.write("\t- default ambiguity code 4 : HH1[12], HH2[12] from R\n\n") #Atom retrival sys.stdout.write(" > atom retrieval\n") not_retrieved = 0 added = 0 for rank in sparky.keys(): assign_ambiguity(sparky[rank]) sparky[rank].expand_pseudoatom(lexic) sparky[rank].convert2bmrb(lexic, format) atom_retrieval(sparky[rank], star[rank]) for atom in [x for x in sparky[rank].values() if x.state == 'n']: not_retrieved += 1 for atom in [x for x in sparky[rank].values() if x.state == 'a']: added += 1 if not_retrieved > 0 : sys.stderr.write("\t- %d atoms defined in <%s> were not found in <%s>\n"%(not_retrieved, star_file, shift_file)) else : sys.stderr.write("\t- All atoms defined in <%s> were retrieved in <%s>\n"%(star_file, shift_file)) if added > 0 : sys.stderr.write("\t- %d atoms from <%s> were added in defined atoms\n\n"%(added, shift_file)) else : sys.stderr.write("\t- no atoms from <%s> added in defined atoms\n\n"%(shift_file)) #Large deviation removal sys.stdout.write("\n> Remove chemical shift with large standard deviation..........\n") removed = remove_deviant(sparky, threshold, interactive) sys.stdout.write("\n> Output files creation........................................\n") if interactive == 'i': choice = raw_input("\t- Add removed and not retrieved atoms into BMRB file? (y/n) ") if choice in ('y', 'Y', 'yes', 'YES', 'YEs', 'Yes', 'yES', 'yeS') : add_all = 1 else : add_all = 0 else : add_all = 0 bmrb = open(bmrb_file, 'w') if add_all == 0 : log = open(log_file, 'w') header, footer = split_STAR(star_file) idlog = 1 idbmrb = 1 if add_all == 0 : log.write("#=[ ATOMS REMOVE BECAUSE OF THEIR LARGE STANDARD DEVIATION ]===============\n") for rank in range(1, len(sequence)+1): for nuc in ('H', 'C', 'N', 'O', 'S'): for pos in ('', 'A', 'B', 'G', 'D', 'E', 'Z', 'H'): for num in ('', '1', '2', '3', '11', '12', '13', '21', '22', '23'): if removed.has_key(rank) : if removed[rank].has_key(nuc+pos+num) : atom = removed[rank][nuc+pos+num] values = ("%.2f"%atom.shift).ljust(8)+("%.2f"%atom.sd).ljust(8) log.write("%-9d%-7d%-8s%-8s%-8s%s%-d\n"%(idlog, rank, one2three_letter_code[atom.res], atom.atom, atom.nuc, values, atom.ambiguity)) idlog += 1 log.write("\n#=[ ATOM NOT FOUND IN THE SPARKY CHEMICAL SHIFT LIST ]=====================\n") for line in header : bmrb.write(line) for rank in range(1, len(sequence)+1): for nuc in ('H', 'C', 'N', 'O', 'S'): for pos in ('', 'A', 'B', 'G', 'D', 'E', 'Z', 'H'): for num in ('', '1', '2', '3', '11', '12', '13', '21', '22', '23'): atom = sparky[rank].get(nuc+pos+num, None) if atom and atom.state != 'n': values = ("%.2f"%atom.shift).ljust(8)+("%.2f"%atom.sd).ljust(8) bmrb.write("%-9d%-7d%-8s%-8s%-8s%s%-d\n"%(idbmrb, rank, one2three_letter_code[atom.res], atom.atom, atom.nuc, values, atom.ambiguity)) idbmrb += 1 elif atom and add_all == 1 and atom.state == 'n': bmrb.write("%-9d%-7d%-8s%-8s%-8s%-8s%-8s%-s #Not foud !\n"%(idbmrb, rank, one2three_letter_code[atom.res], atom.atom, atom.nuc, '@', '@','@')) idbmrb += 1 elif atom and add_all == 0 and atom.state == 'n': log.write("%-9d%-7d%-8s%-8s%-8s%-8s%-8s%-s\n"%(idlog, rank, one2three_letter_code[atom.res], atom.atom, atom.nuc, '@', '@','@')) idlog += 1 if add_all == 1 and removed.has_key(rank) : if removed[rank].has_key(nuc+pos+num) : atom = removed[rank][nuc+pos+num] values = ("%.2f"%atom.shift).ljust(8)+("%.2f"%atom.sd).ljust(8) bmrb.write("%-9d%-7d%-8s%-8s%-8s%s%-d #Large standard deviation!\n"%(idbmrb, rank, one2three_letter_code[atom.res], atom.atom, atom.nuc, values, atom.ambiguity)) idbmrb += 1 for line in footer : bmrb.write(line) bmrb.close() if add_all == 0 : log.close() sys.stdout.write("\t- NMR STAR output file created : <%s>\n"%bmrb_file) sys.stdout.write("\t- sparky2bmrb log file created : <%s>\n"%log_file) sys.stdout.write("\n")