Source code for rna_tools.tools.mq.FARNA.FARNA

#*-* coding: utf-8 *-*

"""Wrapper for ROSETTA software for structure prediction of small 
RNA sequences"""

import os, shutil, re
import subprocess

import os
from shutil import copyfile
from rna_tools.tools.mq.lib.wrappers.SubprocessUtils import run_command
from rna_tools.tools.pdb_formatix.PDBFile import PDBFile#resname_check_and_3to1, set_residues_bfactor
from rna_tools.tools.mq.lib.wrappers.base_wrappers import ProgramWrapper, WrapperError
from rna_tools.tools.pdb_formatix.RosettaUtils import RosettaPDBFile
from rna_tools.tools.pdb_formatix.rebuilder import check_and_rebuild
from rna_tools.rna_tools_config import FARNA_PATH, FARNA_DB_PATH, FARNA_LORES



[docs]
class FARNA(ProgramWrapper):
    """
    Wrapper class for running ROSETTA scoring function automatically.
    """
    program_name = 'farna'  
    src_bin = FARNA_PATH
    db_path = FARNA_DB_PATH
    input_fn = 'seq.fasta'
    input_file = ''
    best_energy = ''
    executable = 'rna_minimize'

    def __init__(self, sequence='test', seq_name='test', job_id=None):
        try:
            self.start_dir = os.getcwd()
        except OSError:        # directory was deleted or something like that
            pass
        super(FARNA, self).__init__(sequence, seq_name, job_id=job_id)

    def _prepare_stderr_stdout(self):
        # create output file
        self.output_file = os.path.join(self.path, 'stdout.txt')
        self.stdout = open(self.output_file, 'w')
        # create error file
        self.error_file = os.path.join(self.path, 'stderr.txt')
        self.stderr = open(self.error_file, 'w')

    def _prepare_files(self):
        # create input file
        self.input_file = open(os.path.join \
            (self.sandbox_dir, self.input_fn), 'w')\
                .write('>seq.fasta\n'+str(self.sequence).lower())
        self._prepare_stderr_stdout()
            

[docs]
    def sandbox(self):
        #shutil.copytree(self.src_bin + os.sep + 'rosetta_source',
        #       self.sandbox_dir + os.sep + 'rosetta_source',
        #        symlinks=True)
        os.symlink(self.src_bin, self.sandbox_dir + os.sep + self.executable)

        #symlinks=True)

        #os.symlink(FARNA_DB_PATH,
        #        self.sandbox_dir + os.sep + 'rosetta_database')
        #os.system('chmod +x %s' % \
        #          os.path.join(self.sandbox_dir, self.executable))   
                               

[docs]
    def run(self, pdb_file, hires, verbose=False, system=False):#, global_energy_score=True):
        """Compute FARNA potential for a single file

        Arguments:
            * pdb_file = path to pdb file
            * global_energy_score = True/False (See Output), default=True

        Output:
            * A list of energies, e.g::

              ['-21.721', '-0.899', '-20.961', '-84.498', '-16.574', '-180.939', '11.549', '7.475', '-17.257', '-306.324', '0.0', '0.0', '17.503', '0.0']

             ??? or a dictionary of lists of local scores, eg::

                {
                'N_BS': [17.0, -0.70039, -0.720981, -0.685238, -0.734146, ... ],
                'atom_pair_constraint': [0.0, -0.764688, -0.773833, ...],
                ...
                }

        """
        global_energy_score=True
        ftxt = open(pdb_file).read()
        ftxt = re.sub('TER\s+END\s+', 'TER', ftxt)
        ftxt = re.sub('END', 'TER', ftxt).strip()
        f = open(self.sandbox_dir + os.sep + 'tmp.pdb', 'w')
        f.write(ftxt)
        f.close()

        pdb_file = self.sandbox_dir + os.sep + 'tmp.pdb'

        if check_and_rebuild(pdb_file, self.sandbox_dir + os.sep + 'query.pdb'):
            self.pdb_fixes.append('rebuild_full_atom')
        pdb_file = RosettaPDBFile(pdb_path=self.sandbox_dir + os.sep + 'query.pdb')
        # get sequence from PDB file
        with open(self.sandbox_dir + os.sep + 'query.fasta', 'w') as f:
            f.write(pdb_file.get_fasta(lowercase=True))
        # create a ROSETTA ready PDB file
        pdb_file.make_rna_rosetta_ready()
        pdb_file.save(self.sandbox_dir + os.sep + 'query.pdb')
        self.pdb_fixes = pdb_file.fixes

        # run
        # os.chdir(self.sandbox_dir)
        self.flags = [self.sandbox_dir + os.sep + self.executable]

        # hires = True
        if hires == True: # False: # must be a string
            minimize_cmd = ' ' # -minimize_rna '
        else:
            minimize_cmd = ' -score:weights ' + FARNA_LORES + ' -minimize_rna '
            ## MM minimize_rna should be off or by option
            ## 2021 i'm not sure why? keep  -minimize_rna on here

        cmd = ' '.join([FARNA_PATH, '-constant_seed -database', self.db_path,
             minimize_cmd,
                        ' -ignore_zero_occupancy false ',
             '-s', self.sandbox_dir + os.sep + 'query.pdb',
             '-out:file:silent', self.sandbox_dir + os.sep + 'SCORE.out'])

        if verbose:
            print(cmd)
        self.log(cmd, 'debug')
        self.log('Running program')
        if system:
            os.system(cmd)
        else:
            out = subprocess.getoutput(cmd)

        self.log('Run finished')
        self.log(out, 'debug')

        self.get_result()

        #if global_energy_score: # ???
        results = []
        for i in list(self.result.keys()):
            results.append(str(self.result[i][0]))
        #return '\t'.join(results)
        return results

        

[docs]
    def get_result(self):
        """Parse and get result from score file created during ROSETTA run

        All results are kept in self.result, but only global score is returned
        """
        f = open(self.sandbox_dir + os.sep + 'SCORE.out')
        output = f.read()
        f.close()
        lines = output.split('\n')
        lines = [l for l in lines if not l.startswith('REMARK')]
        # get names of different scores
        keys = lines[1].split()[1:-1]
        # get global scores
        global_scores = lines[2].split()[1:]
        # global scores are at index 0 in result, local are at 1--len(sequence)

        self.result = dict(list(zip(keys, [[float(s)] for s in global_scores[:len(keys)]])))
        
        ##for l in lines[3:-1]:
        #    scores_res = l.split()[2:-1]  # scores for a single residue
        #    for i in xrange(len(keys)):
        #        self.result[keys[i]].append(float(scores_res[i]))
        #return self.result['score'][0]
        return self.result



[docs]
    def mqap(self, pdb):
        "Total weighted score:\s+(?P<ROSETTA_SCORE>[-\d.]+)"
        pass



[docs]
    def cleanup(self):
        super(FARNA, self).cleanup()



# main
if __name__ == '__main__':
    fns = ['test.pdb']
    fns = ['1xjrA_M1.pdb', 'test.pdb'] 
    fns = ['3e5f_output4_01-000001_AA+ResnShift.pdb'] #2pcw_1_2chains.pdb']  # two chains
    for f in fns:
        f = 'test' + os.sep + f
        print('processing %s' % f)

        if 1:
            # mini false
            farna = FARNA()
            try:
                result = farna.run(f, False)
            except:
                result = 'error'
            print(result)
        if 1:
            # mini true
            farna = FARNA('', '')
            try:
                result = farna.run(f, True)
            except:
                result = 'error'
            print(result)

        #farna.cleanup()