#!/usr/bin/env python
#-*- coding: utf-8 -*-
"""rna_calc_rmsd.py - calculate RMSDs of structures to the target
If you still have problem with various number of atoms, check out this issue: get_rnapuzzle_ready: how to deal with `Alternate location indicator (https://github.com/mmagnus/rna-pdb-tools/issues/30).
The program is using (https://github.com/charnley/rmsd).
Example #1::
$ rna_calc_rmsd.py -t 6_0_solution_4GXY_rpr.pdb --model-selection=A:1-17+24-110+115-168 *.pdb
rmsd_calc_rmsd_to_target
--------------------------------------------------------------------------------
method: all-atom-built-in
# of models: 35
6_0_solution_4GXY_rpr.pdb 0.0 3409
6_Blanchet_1_rpr.pdb 22.31 3409
6_Blanchet_2_rpr.pdb 21.76 3409
6_Blanchet_3_rpr.pdb 21.32 3409
6_Blanchet_4_rpr.pdb 22.22 3409
6_Blanchet_5_rpr.pdb 24.17 3409
6_Blanchet_6_rpr.pdb 23.28 3409
6_Blanchet_7_rpr.pdb 22.26 3409
6_Bujnicki_1_rpr.pdb 36.95 3409
6_Bujnicki_2_rpr.pdb 30.9 3409
6_Bujnicki_3_rpr.pdb 32.1 3409
6_Bujnicki_4_rpr.pdb 32.04 3409
...
Example #2::
time rmsd_calc_to_target.py
-t 5k7c_clean_onechain_renumber_as_puzzle_srr.pdb
--target-selection A:1-48+52-63
--model-selection A:1-48+52-63
--target-ignore-selection A/57/O2\\'
clusters/*_AA.pdb
rmsd_calc_rmsd_to_target
--------------------------------------------------------------------------------
target_selection: A:1-48+52-63
model_selection: A:1-48+52-63
target_ignore_selection: A/57/O2'
model_ignore_selection:
# of models: 801
fn,rmsd_all
pistol_thrs0.50A_clust01-000001_AA.pdb,7.596
pistol_thrs0.50A_clust02-000001_AA.pdb,7.766
pistol_thrs0.50A_clust03-000001_AA.pdb,18.171
[..]
pistol_thrs0.50A_clust799-000001_AA.pdb,5.356
pistol_thrs0.50A_clust800-000001_AA.pdb,15.282
pistol_thrs0.50A_clust801-000001_AA.pdb,16.339
# of atoms used: 1237
csv was created! rmsds.csv
rmsd_calc_to_target.py -t 5k7c_clean_onechain_renumber_as_puzzle_srr.pdb
37.93s user 1.07s system 87% cpu 44.650 total
Works also for multiple chains:
rna_calc_rmsd.py --model-selection='A:52+53+59+60+61+80+B:21+22+23' --target-selection='A:52+53+59+60+61+80+B:21+22+23' -t yC_5LJ3_U2U6_core_mdrFx_onlyTriplex_rpr.pdb yC_5LJ3_U2U6_core_mdrFx_addh_MD_1_rpr_rchain.pdb
"""
from __future__ import print_function
from rna_tools.tools.rna_calc_rmsd.lib.rmsd.calculate_rmsd import *
import sys
from rna_tools.tools.extra_functions.select_fragment import select_pdb_fragment_pymol_style, select_pdb_fragment
import argparse
import sys
import math
import glob
import re
import os
[docs]def get_rna_models_from_dir(files):
"""
:param models: a list of filenames
Example of the list::
['test_data/rp17/2_restr1_Michal1.pdb_clean.pdb', 'test_data/rp17/2a_nonrestr2_Michal1.pdb_clean.pdb',
'test_data/rp17/3_nonrestr1_Michal1.pdb_clean.pdb', 'test_data/rp17/5_restr1_Michal3.pdb_clean.pdb']"""
models = []
#if not os.path.exists(directory):
# raise Exception('Dir does not exist! ', directory)
#files = glob.glob(directory + "/*.pdb")
files_sorted = sort_nicely(files)
for f in files_sorted:
models.append(f)
return models
[docs]def sort_nicely( l ):
""" Sort the given list in the way that humans expect.
http://blog.codinghorror.com/sorting-for-humans-natural-sort-order/
"""
convert = lambda text: int(text) if text.isdigit() else text
alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ]
l.sort( key=alphanum_key )
return l
[docs]def calc_rmsd_pymol(pdb1, pdb2, method):
"""Calculate rmsd using PyMOL. Two methods are available: align and fit
See:
- Align: <http://www.pymolwiki.org/index.php/Align>
- Fit: <http://www.pymolwiki.org/index.php/Fit>
Align can return a list with 7 items:
- RMSD after refinement
- Number of aligned atoms after refinement
- Number of refinement cycles
- RMSD before refinement
- Number of aligned atoms before refinement
- Raw alignment score
- Number of residues aligned
in this version of function, the function returns `RMSD before refinement`.
Install on OSX: ``brew install brewsci/bio/pymol`` or get
If you have a problem::
Match-Error: unable to open matrix file '/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/data/pymol/matrices/BLOSUM62'.
then find BLOSUM62, e.g.::
mdfind -name BLOSUM62 | grep pymol
/Users/magnus/miniconda2/envs/py37/lib/python3.7/site-packages/pymol/pymol_path/data/pymol/matrices/BLOSUM62
/usr/local/Cellar/pymol/2.4.0_3/libexec/lib/python3.9/site-packages/pymol/pymol_path/data/pymol/matrices/BLOSUM62
/Users/magnus/miniconda2/pkgs/pymol-2.4.2-py37h06d7bae_0/share/pymol/data/pymol/matrices/BLOSUM62
/Users/magnus/work/opt/pymol-open-source/data/pymol/matrices/BLOSUM62
and then define ``PYMOL_DATA`` in your .bashrc/.zshrc, e.g.::
export PYMOL_DATA="/Users/magnus/work/opt/pymol-open-source/data/pymol"
"""
try:
import __main__
__main__.pymol_argv = ['pymol', '-qc']
import pymol # import cmd, finish_launching
pymol.finish_launching()
except ImportError:
print('calc_rmsd_pymol: you need to have installed PyMOL')
sys.exit(0) # no error
pymol.cmd.reinitialize()
pymol.cmd.delete('all')
pymol.cmd.load(pdb1, 's1')
pymol.cmd.load(pdb2, 's2')
if method == 'align':
# experiments with align <https://pymolwiki.org/index.php/Align>
# quiet = 0/1: suppress output {default: 0 in command mode, 1 in API}
# (4.130036354064941, 60, 3, 4.813207626342773, 64, 30.0, 3)
values = pymol.cmd.align('s1', 's2',quiet=1, object='aln')
return values[0], values[3] # (,#0) #, pymol.cmd.align('s1','s2')[4])
#raw_aln = pymol.cmd.get_raw_alignment('aln')
#print raw_aln
#for idx1, idx2 in raw_aln:
# print '%s`%d -> %s`%d' % tuple(idx1 + idx2)
#pymol.cmd.save('aln.aln', 'aln')
if method == 'fit':
return (pymol.cmd.fit('s1', 's2'), 'fit')
[docs]def calc_rmsd(a, b, target_selection, target_ignore_selection, model_selection, model_ignore_selection, way, verbose):
"""
Calculate RMSD between two XYZ files
by: Jimmy Charnley Kromann <jimmy@charnley.dk> and Lars Andersen Bratholm <larsbratholm@gmail.com>
project: https://github.com/charnley/rmsd
license: https://github.com/charnley/rmsd/blob/master/LICENSE
a is model
b is target
:params: a = filename of structure a
:params: b = filename of structure b
:return: rmsd, number of atoms
"""
if verbose: print('in:', a)
atomsP, P = get_coordinates(a, model_selection, model_ignore_selection, 'pdb', True, way)
atomsQ, Q = get_coordinates(b, target_selection,target_ignore_selection, 'pdb', True, way)
if verbose:
print(atomsP, P)
print(atomsQ, Q)
if atomsQ != atomsP:
print('Error: number of atoms is not equal target (' + b + '):' + str(atomsQ) + ' vs model (' + a + '):' + str(atomsP))
return (-1,0) # skip this RNA
# Calculate 'dumb' RMSD
normal_rmsd = rmsd(P, Q)
# Create the centroid of P and Q which is the geometric center of a
# N-dimensional region and translate P and Q onto that center.
# http://en.wikipedia.org/wiki/Centroid
Pc = centroid(P)
Qc = centroid(Q)
P -= Pc
Q -= Qc
if False:
V = rotate(P, Q)
V += Qc
write_coordinates(atomsP, V)
quit()
return round(kabsch_rmsd(P, Q),2), atomsP
[docs]def get_parser():
import argparse
class SmartFormatter(argparse.HelpFormatter):
def _split_lines(self, text, width):
if text.startswith('R|'):
return text[2:].splitlines()
# this is the RawTextHelpFormatter._split_lines
return argparse.HelpFormatter._split_lines(self, text, width)
parser = argparse.ArgumentParser(description=__doc__, formatter_class=SmartFormatter)#formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('-t',"--target-fn",
default='', required = True,
help="pdb file")
parser.add_argument('--ignore-files', help='files to be ingored, .e.g, \'solution\'', default='')
parser.add_argument("--target-selection",
default='',
help="selection, e.g. A:10-16+20, where #16 residue is included")
parser.add_argument("--target-ignore-selection",
default='',
help="A/10/O2\'")
parser.add_argument("--model-selection",
default='',
help="selection, e.g. A:10-16+20, where #16 residue is included")
parser.add_argument("--model-ignore-selection",
default='',
help="A/10/O2\'")
parser.add_argument('-m', "--method",
default='all-atom-built-in',
help="align, fit")
parser.add_argument('-o', "--rmsds-fn",
default='rmsds.csv',
help="ouput, matrix")
parser.add_argument("-v", "--verbose", action="store_true",
help="verbose")
parser.add_argument('-pr', '--print-results',
action="store_true")
parser.add_argument('-sr', '--sort-results',
action="store_true")
parser.add_argument('-pp', '--print-progress',
default=False,
action="store_true")
parser.add_argument('--way', help="""R|c1p = C1'
backbone = P OP1 OP2 O5' C5' C4' C3' O3'
po = P OP1 OP2
no-backbone = all - po
bases, backbone+sugar, sugar
pooo = P OP1 OP2 O5'
alpha = P OP1 OP2 O5' C5'
""", default='all')
parser.add_argument("--name-rmsd-column", help="default: fn,rmsd, with this cols will be fn,<name-rmsd-column>")
parser.add_argument("--target-column-name", action="store_true",
help="")
parser.add_argument('files', help='files', nargs='+')
return parser
# main
if __name__ == '__main__':
parser = get_parser()
args = parser.parse_args()
input_files = args.files # opts.input_dir
tmp = []
if args.ignore_files:
for f in input_files:
if args.ignore_files in f:
continue
tmp.append(f)
input_files = tmp
rmsds_fn = args.rmsds_fn
target_fn = args.target_fn
method = args.method
print('method:', method)
target_selection = select_pdb_fragment(args.target_selection)
model_selection = select_pdb_fragment(args.model_selection)
if target_selection:
if args.verbose:
print(' target_selection: #', args.target_selection, target_selection)
ts = target_selection
print(ts)
resides_in_total = 0
for i in target_selection:
print((i, len(ts[i]))) # chain string, a list of residues
resides_in_total += len(ts[i])
print('in total:', resides_in_total)
if model_selection:
if args.verbose:
print(' model_selection: ', len(model_selection), args.model_selection, model_selection)
resides_in_total = 0
for i in model_selection:
print(i, len(model_selection[i])) # chain string, a list of residues
resides_in_total += len(model_selection[i])
print('in total:', resides_in_total)
if args.target_ignore_selection:
target_ignore_selection = select_pdb_fragment_pymol_style(args.target_ignore_selection, args.verbose)
else:
target_ignore_selection = None
if args.model_ignore_selection:
model_ignore_selection = select_pdb_fragment_pymol_style(args.model_ignore_selection, args.verbose)
else:
model_ignore_selection = None
if args.target_ignore_selection:
if args.verbose: print(' target_ignore_selection: ', args.target_ignore_selection)
if args.model_ignore_selection:
if args.verbose: print(' model_ignore_selection: ', args.model_ignore_selection)
models = get_rna_models_from_dir(input_files)
print('target:', target_fn)
print('of models:', len(models))
f = open(rmsds_fn, 'w')
#t = 'target:' + os.path.basename(target_fn) + ' , rmsd_all\n'
if args.name_rmsd_column:
t = 'fn,' + args.name_rmsd_column + '\n'
elif args.target_column_name:
t = 'fn,' + os.path.basename(args.target_fn) + '\n'
else:
t = 'fn,rmsd_all\n'
c = 1
for r1 in models:
if method == 'align' or method == 'fit':
rmsd_curr, atoms = calc_rmsd_pymol(r1, target_fn, method, args.verbose)
else:
rmsd_curr, atoms = calc_rmsd(r1, target_fn, target_selection, target_ignore_selection, model_selection, model_ignore_selection, args.way, args.verbose)
r1_basename = os.path.basename(r1)
if args.print_progress: print(r1_basename, rmsd_curr, atoms)
t += r1_basename + ',' + str(round(rmsd_curr,3)) + ' '
c += 1
t += '\n'
f.write(t)
f.close()
print('number of atoms used:', atoms)
try:
import pandas as pdx
pd.set_option('display.max_rows', 1000)
except:
print(t.strip()) # matrix
sys.exit(0)
df = pd.read_csv(rmsds_fn)
df = df.round(2)
if args.sort_results:
df = df.sort_values('rmsd_all', ascending=True)
if args.print_results:
print(df)
df.to_csv(rmsds_fn, sep=',', index=False) # easy to set \t here!
# print('# csv was created! ', rmsds_fn)