Source code for rna_tools.tools.pdbs_measure_atom_dists.pdbs_measure_atom_dists

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""This is a quick and dirty method of comparison two RNA structures (stored in pdb files).
It measures the distance between the relevan atoms (C4') for nucleotides defined as "x" in the
sequence alignment.

author: F. Stefaniak, modified by A. Zyla,  supervision of mmagnus
"""
from __future__ import print_function
from Bio.PDB import PDBParser
from scipy.spatial import distance
from Bio.SeqRecord import SeqRecord
from Bio import SeqIO
import logging
import argparse
import numpy as np
import matplotlib.pyplot as plt

# logger
logger = logging.getLogger()
handler = logging.StreamHandler()
logger.addHandler(handler)



[docs]
def get_seq(alignfn, seqid):
    """Get seq from an alignment with gaps.

    Args:

       alignfn (str): a path to an alignment
       seqid   (str): seq id in an alignment

    Usage::

        >>> get_seq('test_data/ALN_OBJ1_OBJ2.fa', 'obj1')
        SeqRecord(seq=SeqRecord(seq=Seq('GUUCAG-------------------UGAC-', SingleLetterAlphabet()), id='obj1', name='obj1', description='obj1', dbxrefs=[]), id='<unknown id>', name='<unknown name>', description='<unknown description>', dbxrefs=[])

    Returns:
         SeqRecord
    """
    # alignment = AlignIO.read(alignfn, 'fasta')
    alignment = SeqIO.index(alignfn, 'fasta')
    # print SeqRecord(alignment[seqid])
    sequence = SeqRecord(alignment[seqid])
    return sequence




[docs]
def open_pdb(pdbfn):
    """Open pdb with Biopython.

    Args:
       pdbfn1 (str): a path to a pdb structure

    Returns:
       PDB Biopython object: with a pdb structure

    """
    parser = PDBParser()
    return parser.get_structure('', pdbfn)




[docs]
def find_core(seq_with_gaps1, seq_with_gaps2):
    """.

    Args:
        seq_with_gaps1 (str): a sequence 1 from the alignment
        seq_with_gaps1 (str): a sequence 2 from the alignment

    Usage::

        >>> find_core('GUUCAG-------------------UGAC-', 'CUUCGCAGCCAUUGCACUCCGGCUGCGAUG')
        'xxxxxx-------------------xxxx-'


    Returns:
        core="xxxxxx-------------------xxxx-"

    """
    core = "".join(["x" if (a != '-' and b != '-') else "-" for (a, b)
                    in zip(seq_with_gaps1, seq_with_gaps2)])
    return core




[docs]
def map_coords_atom(structure):
    """.

        Args:
        structure (pdb): PDB Biopython object: with a pdb structure

        Returns:
            struct1dict: a list of coords for atoms
            structure1realNumber: a list of residues
        """

    resNumber = 0
    struct1dict = {}
    structure1realNumbers = {}

    for res in structure.get_residues():
        # print res
        for atom in res:
            name, coord = atom.name.strip(), atom.coord
            # G C5' [-15.50800037  -7.05600023  13.91800022]
            if name == atomToCompare:
                # print name, coord
                struct1dict[resNumber] = coord
                structure1realNumbers[resNumber] = res.get_full_id()[3][1]

        resNumber += 1
    return struct1dict,  structure1realNumbers




[docs]
def get_parser():
    parser = argparse.ArgumentParser(description=__doc__,
                                     formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("-v", "--verbose", help="increase output verbosity",
                        action="store_true")
    parser.add_argument("seqid1", help="seq1 id in the alignemnt")
    parser.add_argument("seqid2", help="seq2 id in the alignemnt")
    parser.add_argument("alignfn", help="alignemnt in the Fasta format")
    parser.add_argument("pdbfn1", help="pdb file1")
    parser.add_argument("pdbfn2", help="pdb file2")
    return parser



# main
if __name__ == '__main__':
    import doctest
    doctest.testmod()

    args = get_parser().parse_args()

    if args.verbose:
        logger.setLevel(logging.INFO)

    # if not args.outfn:
        # args.outfn = args.pdbfn.replace('.pdb', '_out.pdb')

    seq_with_gaps1 = get_seq(args.alignfn, args.seqid1)
    seq_with_gaps2 = get_seq(args.alignfn, args.seqid2)
    pdb1 = open_pdb(args.pdbfn1)
    pdb2 = open_pdb(args.pdbfn2)
    core = find_core(seq_with_gaps1, seq_with_gaps2)
    atomToCompare = "C4'"
    sep = '\t'

    structure1 = pdb1
    structure2 = pdb2

    struct1dict, structure1realNumbers = map_coords_atom(pdb1)
    struct2dict, structure2realNumbers = map_coords_atom(pdb2)

    stats = []
    stats.append(["res1", "res2", "distance [A]"])

    resNumber = 0
    seq1number = -1
    seq2number = -1

    for char in core:
        # local sequence numbering
        if seq_with_gaps1[resNumber] != '-':
            seq1number += 1
            # print "seq1", seq1number, seq1[resNumber]
        if seq_with_gaps2[resNumber] != '-':
            seq2number += 1
            # print "seq2", seq2number, seq2[resNumber]

        # alignment checking (iksy)
        if char == 'x':
            vect1 = struct1dict[seq1number]
            vect2 = struct2dict[seq2number]
            stats.append([str(structure1realNumbers[seq1number]),
                          str(structure2realNumbers[seq2number]),
                          str(distance.euclidean(vect1, vect2))])
            # print vect1,vect2

        resNumber += 1

    # struc = renumber(seq_with_gaps, pdb, args.residue_index_start)
    # write_struc(struc, args.outfn)
    list_res=[]
    for i in stats:
        print(sep.join(i))
        table=(sep.join(i))
        list_res.append(i)
    #print (list_res)
    res_matrix = np.array(list_res[1:])

    '''
    Creating a plot

    '''

    new_resi = list_res[1:]
    new_resis = []

    for i in new_resi:
        #print (j)
        new_resis.append(str(i[0]) + '/' + str(i[1]))


    #print (new_resis)

    list2_matrix= new_resis

    list2_matrix1 = map(float, list(res_matrix[:,2]))
    #print (list2_matrix1)

    plt.bar(list2_matrix,list2_matrix1,facecolor='pink' )

    plt.suptitle('Distance between C4 atoms of residues')
    plt.ylabel("distance [A]")
    plt.xlabel('Nr of residue')
    plt.show()