Source code for rna_tools.tools.renum_pdb_to_aln.renum_pdb_to_aln

#!/usr/bin/env python
"""renum_pdb_to_aln.py - renumber a pdb file based on the alignment.

author: A. Zyla under supervision of mmagnus

.. warning:: works only for single chain! and requires Biopython (tested with v1.68)
"""

import logging
import argparse
from Bio.SeqRecord import SeqRecord
from Bio import SeqIO
from Bio.PDB import PDBParser
from Bio.PDB import PDBIO
from Bio.PDB.Atom import PDBConstructionWarning
import warnings
warnings.simplefilter('ignore', PDBConstructionWarning)

# logger
logger = logging.getLogger()
handler = logging.StreamHandler()
logger.addHandler(handler)


[docs] def get_seq(alignfn, seqid): """Get seq from an alignment with gaps. Args: alignfn (str): a path to an alignment seqid (str): seq id in an alignment Usage:: >>> get_seq('test_data/ALN_OBJ1_OBJ2.fa', 'obj1') SeqRecord(seq=SeqRecord(seq=Seq('GUUCAG-------------------UGAC-', SingleLetterAlphabet()), id='obj1', name='obj1', description='obj1', dbxrefs=[]), id='<unknown id>', name='<unknown name>', description='<unknown description>', dbxrefs=[]) Returns: SeqRecord """ # alignment = AlignIO.read(alignfn, 'fasta') alignment = SeqIO.index(alignfn, 'fasta') # print SeqRecord(alignment[seqid]) sequence = SeqRecord(alignment[seqid]) return sequence
[docs] def open_pdb(pdbfn): """Open pdb with Biopython. Args: pdbfn (str): a path to a pdb structure Returns: PDB Biopython object: with a pdb structure """ parser = PDBParser() return parser.get_structure('struc', pdbfn)
[docs] def renumber(seq_with_gaps, struc, residue_index_start): """Renumber a pdb file. Args: seq_with_gaps (str): a target sequence extracted from the alignment struc (pdb): a structure residue_index_start (int): starting number Returns: BioPython Structure object """ new_numbering = [] for nt in seq_with_gaps: if nt != '-': nt_num_a = [residue_index_start, nt] new_numbering.append(residue_index_start) logger.info(nt_num_a) residue_index_start = residue_index_start + 1 logger.info(new_numbering) # works only for single chain for struc in pdb: for chain in struc: for residue, resi in zip(chain, new_numbering): residue.id = (residue.id[0], resi, residue.id[2]) return struc
[docs] def write_struc(struc, outfn): """Write renumbered pdb with Biopython. Args: struc (pdb): a renumbered structure outfn (str): a path to a new, renumbered pdb file Returns: none: writes to a file """ io = PDBIO() io.set_structure(struc) io.save(outfn) logger.info('Structure written to %s' % outfn)
[docs] def get_parser(): parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("-v", "--verbose", help="increase output verbosity", action="store_true") parser.add_argument("--residue_index_start", help="renumber starting number (default: 1)", default=1, type=int) parser.add_argument("--outfn", help="output pdb file (default: pdbfn .pdb -> _out.pdb)") parser.add_argument("seqid", help="seq id in the alignemnt") parser.add_argument("alignfn", help="alignemnt in the Fasta format") parser.add_argument("pdbfn", help="pdb file") return parser
# main if __name__ == '__main__': args = get_parser().parse_args() if args.verbose: logger.setLevel(logging.INFO) if not args.outfn: args.outfn = args.pdbfn.replace('.pdb', '_out.pdb') seq_with_gaps = get_seq(args.alignfn, args.seqid) pdb = open_pdb(args.pdbfn) struc = renumber(seq_with_gaps, pdb, args.residue_index_start) write_struc(struc, args.outfn)