Source code for rna_tools.tools.rna_rosetta.rna_rosetta_check_progress

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""rna_rosetta_check_progress.py - check progress for many simulations of Rosetta

Example::

    [peyote2] rosetta_jobs rna_rosetta_check_progress.py .
             jobs  #curr  #todo  #decoys done
    0  ./rp17s223    200      0      407  [ ]
    1   ./rp17hcf      0      0        0  [ ]
    # curr  232 #todo  0

"""
from __future__ import print_function
import subprocess

import os
import pandas as pd
import glob
import sys
import argparse

try:
    from rna_tools.rna_tools_config import RNA_ROSETTA_RUN_ROOT_DIR_MODELING, RNA_ROSETTA_NSTRUC
except:
    print('Set up RNA_ROSETTA_RUN_ROOT_DIR_MODELING in rna_tools.rna_tools_config_local.py')

from rna_tools.rna_tools_config import EASY_CAT_PATH
# print('Set up EASY_CAT_PATH in rpt_config_local.py')
print(EASY_CAT_PATH)


[docs] def run_cmd(cmd): o = subprocess.Popen( cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out = o.stdout.read().strip().decode() err = o.stderr.read().strip().decode() return out, err
[docs] def get_parser(): parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('dir', default=RNA_ROSETTA_RUN_ROOT_DIR_MODELING, help="""directory with rosetta runs, define by RNA_ROSETTA_RUN_ROOT_DIR_MODELING right now: \n""" + RNA_ROSETTA_RUN_ROOT_DIR_MODELING) parser.add_argument('-v', '--verbose', action='store_true', help="be verbose") parser.add_argument('-m', '--min-only', action='store_true', help="check only for mo folder") parser.add_argument('-s', '--select', help="select for analysis only jobs with this phrase, .e.g., evoseq_", default='') parser.add_argument('-k', '--kill', action='store_true', help="""kill (qdel) jobs if your reach limit (nstruc) of structure that you want, right now is %i structures""" % RNA_ROSETTA_NSTRUC) return parser
if __name__ == '__main__': args = get_parser().parse_args() v = args.verbose # jobs = [x.replace('.fa','') for x in glob.glob( '*.fa')] # sys.argv[1] + # sys.argv[1] + # print jobs jobs = [x for x in glob.glob(args.dir + '/*')] if v: print(jobs) curr_path = os.getcwd() d = {} d['#decoys'] = [] d['jobs'] = [] d['#curr'] = [] d['#todo'] = [] d['done'] = [] d['progress'] = [] for j in jobs: if args.select: if not args.select in j: continue if v: print(j) d['jobs'].append(j) try: os.chdir(j) except: pass # OSError: [Errno 20] Not a directory: dirs = [] if not args.min_only: # Uff.. this is crazy. for c in ['_', '__', '___', 'x', 'y', 'z', 'X', 'Y', 'Z', 'o', 'out'] + ['r' + str(i) for i in range(0, 1000)]: if os.path.exists(c): dirs.append(c) # print ' ', j, '', dirs cmd = EASY_CAT_PATH + ' ' + ' '.join(dirs) if v: print('cmd: %s' % cmd) out, err = run_cmd(cmd) if out.strip(): if len(j) <= 5: if v: print(out + '\t\t', end='') else: if v: print(out + '\t', end='') no_decoys = int(out.split()[-2]) else: no_decoys = 0 if v: print('#', no_decoys) else: # # of minimized RNA_ROSETTA_NSTRUC = 1600 # change expected !!!! pretty ugly for c in ['mo']: if os.path.exists(c): dirs.append(c) # print ' ', j, '', dirs cmd = EASY_CAT_PATH + ' ' + ' '.join(dirs) if v: print(cmd) out, err = run_cmd(cmd) if out.strip(): if len(j) <= 5: if v: print(out + '\t\t', end='') else: if v: print(out + '\t', end='') no_decoys = int(out.split()[-2]) else: no_decoys = 0 if v: print('#', no_decoys) # check current running, only 6 char are taken from dir name # /home/magnus/rosetta_jobs/rp17s223 -> rp17s2199, rp17s2185 cmd = 'qstat | grep ' + \ os.path.basename(j)[:6] + ' | grep " r " | wc -l ' if v: print(cmd) out, err = run_cmd(cmd) if v: print('@cluster #curr ', out, end="") curr = int(out) d['#curr'].append(curr) # check todo out, err = run_cmd('qstat | grep ' + os.path.basename(j) [:6] + ' | grep " qw " | wc -l ') if v: print('#todo ', out, end="") todo = int(out) d['#todo'].append(todo) d['#decoys'].append(no_decoys) if no_decoys >= RNA_ROSETTA_NSTRUC: if v: print('# @cluster:', out, ' < ............... OK') d['done'].append('[x]') if args.kill: cmd = 'qstat | grep ' + \ os.path.basename(j)[:6] + \ " | awk '{print $1}' | xargs qdel " print(cmd) os.system(cmd) else: d['done'].append('[ ]') if v: print('# @cluster:', out) # cmd = 'kill ' # print cmd d['progress'].append(round((float(no_decoys) / 10000) * 100, 2)) os.chdir(curr_path) df = pd.DataFrame(d, columns=['jobs', '#curr', '#todo', '#decoys', 'done', 'progress']) print(df) out, err = run_cmd('qstat | grep magnus | grep " r " | wc -l ') print('#curr ', out, end=" ") out, err = run_cmd('qstat | grep magnus | grep " qw " | wc -l ') print('#todo ', out)