Source code for rna_tools.tools.rna_rosetta.rna_rosetta_min

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""rna_rosetta_min.py - a script to do minimization

The script takes the number of structures and the analyzed silence file and does the maths.

Job names will be as your silent file preceding with ~, .e.g ``~tha``.

http://www.sciencedirect.com/science/article/pii/S0076687914000524 ::

    ade$ rna_rosetta_cluster.py ade.out

The first number states how many processors to use for the run, while the second number is 1/6 the total number of previously generated FARNA models. If you are running on a supercomputer that only allows specific multiples of processors, use an appropriate number for the first input.::

    rosetta_submit.py min_cmdline min_out 1 24

rosetta_submit.py min_cmdline min_out [1] [16] The first number states how many processors to use for each line in min_cmdline. Here, enter 1 for the first input so that the total number of processors used will be equal to the number of processors entered with the "-proc" flag in command line [12], above. The second number states the maximum time each job will be allowed to run (walltime). Start the run with the appropriate command listed by the out- put above (e.g., source qsubMPI for the Stampede cluster).

E.g. for 20k silet file, 1/6 will be minimized = 3.3k::

    parallel_min_setup.py -silent rp21cr62.out -tag rp21cr62_min  -proc 200 -nstruct 3200 -out_folder mo -out_script MINIMIZE " -ignore_zero_occupancy false "
    rosetta_submit.py MINIMIZE mo 1 100 m

    [peyote2] rp21 easy_cat.py mo
    Catting into:  rp21_min.out ... from 200 primary files. Found 3200  decoys.

    # on 200 cpus it took around ~30min
"""
from __future__ import print_function
import logging

logging.basicConfig(filename='rna_rosetta_min.log', level=logging.INFO,
                    format="%(asctime)s - %(levelname)s - %(message)s")

import argparse
import os
import glob
import subprocess
import math
import logging
import shutil
import re



[docs]
def get_no_structures(file):
    """Get a number of structures in a silent file"""
    p = subprocess.Popen('cat ' + file + ' | grep SCORE | wc -l', shell=True,
                         stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    p.wait()
    stderr = p.stderr.read().strip()
    if stderr:
        print(stderr)
    return int(p.stdout.read().strip()) - 1




[docs]
def min(silent_file, take_n, cpus, go):
    """Run parallel_min_setup (to MINIMIZE file), rosetta_submit.py, and qsubMINI.

    Fix on the way, qsub files::

        -out:file:silent mo/0/mo/123/tha_min.out -> -out:file:silent mo/123/tha_min.out

    I don't know why mo/0/ is there. I might be because of my changes in rosetta_submit.py (?). """

    #-skip coord_constraints
    # + ' -params_file ' + f.replace('.out', '.params') +
    # ERROR:  -params_file not supported in rna_minimize anymore.
    # ERROR:: Exit from: src/apps/public/farna/rna_minimize.cc line: 156
    # -cst_fa_file
    # glycine_riboswitch_constraints glycine_riboswitch.params-ignore_zero_occupancy false -skip_ coord_constraints
    #cmd = 'extract_pdbs.default.linuxgccrelease -in::file::silent cluster.out'

    logging.info('cpun %i' % cpus)

    # parallel_min_setup
    cmd = "parallel_min_setup.py -silent " + silent_file + " -tag " + silent_file.replace('.out', '') + '_min  -proc ' + str(
        cpus) + ' -nstruct ' + str(take_n) + ' -out_folder mo -out_script MINIMIZE "' + ' -ignore_zero_occupancy false "'
    print(cmd)
    logging.info(cmd)
    os.system(cmd)

    # rosetta_submit
    cmd = "rosetta_submit.py MINIMIZE mo 1 100 m"
    print(cmd)
    logging.info(cmd)
    os.system(cmd)

    # fixing qsub file
    d = "qsub_files"

    for c, qf in enumerate(os.listdir(d), 0):
        qf = d + os.sep + qf
        f = open(qf)
        txt = f.read()
        #x = re.search('-out:file:silent (?P<extra>mo\/\d+)\/min', txt )
        txt = txt.replace('-out:file:silent mo/0/', '-out:file:silent ')

        # fix naming of my jobs
        txt = txt.replace('#$ -N mm0', '#$ -N ~' +
                          silent_file.replace('.out', '').strip() + '_' + str(c))  # to get ~rp06bx571_186

        f.close()

        # write to file
        open(qf, 'w').write(txt)

    if go:
        # run qsubMINI
        logging.info('qsubMINI')
        os.system('chmod +x ./qsubMINI')
        os.system('./qsubMINI')




[docs]
def get_parser():
    parser = argparse.ArgumentParser(
        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('file', help='ade.out')
    parser.add_argument('-g', '--go', action='store_true')
    parser.add_argument('-c', '--cpus', help='default: 200', default=200)
    return parser




[docs]
def run():
    args = get_parser().parse_args()
    ns = get_no_structures(args.file)
    print('# structures:', ns)
    take_n = int(ns * 0.16)  # 1/6
    min(args.file, take_n, int(args.cpus), args.go)



# main
if __name__ == '__main__':
    run()