Source code for rna_tools.tools.rna_rosetta.rna_rosetta_min

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""rna_rosetta_min.py - a script to do minimization

The script takes the number of structures and the analyzed silence file and does the maths.

Job names will be as your silent file preceding with ~, .e.g ``~tha``.

http://www.sciencedirect.com/science/article/pii/S0076687914000524 ::

    ade$ rna_rosetta_cluster.py ade.out

The first number states how many processors to use for the run, while the second number is 1/6 the total number of previously generated FARNA models. If you are running on a supercomputer that only allows specific multiples of processors, use an appropriate number for the first input.::

    rosetta_submit.py min_cmdline min_out 1 24

rosetta_submit.py min_cmdline min_out [1] [16] The first number states how many processors to use for each line in min_cmdline. Here, enter 1 for the first input so that the total number of processors used will be equal to the number of processors entered with the "-proc" flag in command line [12], above. The second number states the maximum time each job will be allowed to run (walltime). Start the run with the appropriate command listed by the out- put above (e.g., source qsubMPI for the Stampede cluster).

E.g. for 20k silet file, 1/6 will be minimized = 3.3k::

    parallel_min_setup.py -silent rp21cr62.out -tag rp21cr62_min  -proc 200 -nstruct 3200 -out_folder mo -out_script MINIMIZE " -ignore_zero_occupancy false "
    rosetta_submit.py MINIMIZE mo 1 100 m

    [peyote2] rp21 easy_cat.py mo
    Catting into:  rp21_min.out ... from 200 primary files. Found 3200  decoys.

    # on 200 cpus it took around ~30min
"""
from __future__ import print_function
import logging

logging.basicConfig(filename='rna_rosetta_min.log', level=logging.INFO,
                    format="%(asctime)s - %(levelname)s - %(message)s")

import argparse
import os
import glob
import subprocess
import math
import logging
import shutil
import re


[docs] def get_no_structures(file): """Get a number of structures in a silent file""" p = subprocess.Popen('cat ' + file + ' | grep SCORE | wc -l', shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) p.wait() stderr = p.stderr.read().strip() if stderr: print(stderr) return int(p.stdout.read().strip()) - 1
[docs] def min(silent_file, take_n, cpus, go): """Run parallel_min_setup (to MINIMIZE file), rosetta_submit.py, and qsubMINI. Fix on the way, qsub files:: -out:file:silent mo/0/mo/123/tha_min.out -> -out:file:silent mo/123/tha_min.out I don't know why mo/0/ is there. I might be because of my changes in rosetta_submit.py (?). """ #-skip coord_constraints # + ' -params_file ' + f.replace('.out', '.params') + # ERROR: -params_file not supported in rna_minimize anymore. # ERROR:: Exit from: src/apps/public/farna/rna_minimize.cc line: 156 # -cst_fa_file # glycine_riboswitch_constraints glycine_riboswitch.params-ignore_zero_occupancy false -skip_ coord_constraints #cmd = 'extract_pdbs.default.linuxgccrelease -in::file::silent cluster.out' logging.info('cpun %i' % cpus) # parallel_min_setup cmd = "parallel_min_setup.py -silent " + silent_file + " -tag " + silent_file.replace('.out', '') + '_min -proc ' + str( cpus) + ' -nstruct ' + str(take_n) + ' -out_folder mo -out_script MINIMIZE "' + ' -ignore_zero_occupancy false "' print(cmd) logging.info(cmd) os.system(cmd) # rosetta_submit cmd = "rosetta_submit.py MINIMIZE mo 1 100 m" print(cmd) logging.info(cmd) os.system(cmd) # fixing qsub file d = "qsub_files" for c, qf in enumerate(os.listdir(d), 0): qf = d + os.sep + qf f = open(qf) txt = f.read() #x = re.search('-out:file:silent (?P<extra>mo\/\d+)\/min', txt ) txt = txt.replace('-out:file:silent mo/0/', '-out:file:silent ') # fix naming of my jobs txt = txt.replace('#$ -N mm0', '#$ -N ~' + silent_file.replace('.out', '').strip() + '_' + str(c)) # to get ~rp06bx571_186 f.close() # write to file open(qf, 'w').write(txt) if go: # run qsubMINI logging.info('qsubMINI') os.system('chmod +x ./qsubMINI') os.system('./qsubMINI')
[docs] def get_parser(): parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('file', help='ade.out') parser.add_argument('-g', '--go', action='store_true') parser.add_argument('-c', '--cpus', help='default: 200', default=200) return parser
[docs] def run(): args = get_parser().parse_args() ns = get_no_structures(args.file) print('# structures:', ns) take_n = int(ns * 0.16) # 1/6 min(args.file, take_n, int(args.cpus), args.go)
# main if __name__ == '__main__': run()