Source code for rna_tools.Seq

#!/usr/bin/env python3
#-*- coding: utf-8 -*-
"""RNA Sequence with secondary structure prediction methods.

This tool takes a given sequence and returns the secondary structure prediction provided by 5 different tools: RNAfold, RNAsubopt, ipknot, contextfold and centroid_fold. You must have these tools installed. You don't have to install all tools if you want to use only one of the methods.

It's easy to add more methods of your choince to this class.

Installation
~~~~~~~~~~~~~
Depends on what tools you want to use, follow the instructions below.

ContextFold
^^^^^^^^^^^^^^^^^^^^^
https://www.cs.bgu.ac.il/~negevcb/contextfold/

needs Java. Try this on Ubuntu 14-04 https://askubuntu.com/questions/521145/how-to-install-oracle-java-on-ubuntu-14-04 Single chain only!

ViennaRNA
^^^^^^^^^^^^^^
https://www.tbi.univie.ac.at/RNA/

For OSX install from the binary Installer from the page.

ipknot OSX
^^^^^^^^^^^^^
https://github.com/satoken/homebrew-rnatools

If one encounters a problem::

    [mm] Desktop$ /usr/local/opt/bin/ipknot
    dyld: Library not loaded: /usr/local/opt/glpk/lib/libglpk.40.dylib
      Referenced from: /usr/local/opt/bin/ipknot
      Reason: image not found
    [1]    51654 abort      /usr/local/opt/bin/ipknot

the solution is::

     brew install glpk # on OSX

RNA Structure
^^^^^^^^^^^^^
http://rna.urmc.rochester.edu/

Works with 5.8.1; Jun 16, 2016.

Download http://rna.urmc.rochester.edu/RNAstructureDownload.html and untar it in ``<RNA_PDB_TOOLS>/opt/RNAstructure/``; run make, the tools will be compiled in a folder ``exe``. Set up ``DATPATH`` in your bashrc to ``<RNA_PDB_TOOLS>/opt/RNAstructure/data_tables`` ``DATAPATH=/home/magnus/work/src/rna-pdb-tools/opt/RNAstructure/data_tables/`` (read more http://rna.urmc.rochester.edu/Text/Thermodynamics.html). RNAstructure can be run with SHAPE restraints, read more http://rna.urmc.rochester.edu/Text/File_Formats.html#Constraint about the format. The file format for SHAPE reactivity comprises two columns. The first column is the nucleotide number, and the second is the reactivity. Nucleotides for which there is no SHAPE data can either be left out of the file, or the reactivity can be entered as less than -500. Columns are separated by any white space.

MC-Sym
^^^^^^^^^^^^^

FAQ
~~~~~~~~~~~~~

- Does it work for more than one chain??? Hmm.. I think it's not. You have to check on your own. --magnus

TIPS
~~~~~~~~~~~~~

Should you need to run it on a list of sequences, use the following script::

    from rna_tools import Seq
    f = open("listOfSequences.fasta")
    for line in f:
     if line.startswith('>'):
       print line,
     else:
       print line,
       s = Seq.Seq(line.strip()) # module first Seq and class second Seq #without strip this has two lines
       print s.predict_ss(method="contextfold"),
       #print s.predict_ss(method="centroid_fold")

TODO
~~~~~~~~~~~~~

- This calss should be renamed to RNASeq and merged with RNASeq class from RNAalignment

"""  # noqa
import os
import subprocess
import tempfile
import sys

from rna_tools.SecondaryStructure import parse_vienna_to_pairs
from rna_tools.rna_tools_config import CONTEXTFOLD_PATH, RNASTRUCTURE_PATH, ENTRNA_PATH, IPKNOT_PATH



[docs]
class MethodNotChosen(Exception):
    pass




[docs]
class RNASequence(object):
    """RNASequence.

    Usage::

        >>> seq = RNASequence("CCCCUUUUGGGG")
        >>> seq.name = 'RNA03'
        >>> print(seq.predict_ss("RNAfold", constraints="((((....))))"))
        >RNA03
        CCCCUUUUGGGG
        ((((....)))) ( -6.40)

    """

    def __init__(self, seq, ss='', name='rna_seq'):
        self.seq = seq
        self.ss = ss
        self.ss_log = ''
        self.name = name

    def __repr__(self):
        return self.name + '\n' + self.seq + '\n' + self.ss


[docs]
    def eval(self, ss='', no_dangling_end_energies=False, verbose=False):
        """Evaluate energy of RNA sequence.

        Args:
            ss (optional), if not set, then self.ss is taken for calc
            no_dangling_end_energies (Boolean)
            verbose (Boolean)

        Returns:
            Energy (float)

        The RNAeval web server calculates the energy of a RNA sequence on a given secondary structure.
        You can use it to get a detailed thermodynamic description (loop free-energy decomposition) of your RNA structures.

        Simply paste or upload your sequence below and click Proceed. To get more information on the meaning of the options click the help symbols. You can test the server using this sample sequence/structure pair.

        An equivalent RNAeval command line call would have been::

            RNAeval -v -d0 < input.txt

        Read more: <http://rna.tbi.univie.ac.at//cgi-bin/RNAWebSuite/RNAeval.cgi>

        """
        tf = tempfile.NamedTemporaryFile(delete=False)
        if not ss:
            ss = self.ss

        tf.name += '.fa'
        with open(tf.name, 'w') as f:
            f.write('>' + self.name + '\n')
            f.write(self.seq + '\n')
            f.write(ss + '\n')

        dopt = ' -d2 '
        if no_dangling_end_energies:
            dopt = ' -d0 '

        cmd = 'RNAeval ' + dopt + ' < ' + tf.name
        if verbose:
            print(cmd)
        self.ss_log = subprocess.check_output(cmd, shell=True).decode()
        # [u'>rna_seq\nGGCAGGGGCGCUUCGGCCCCCUAUGCC\n((((((((.((....)).)))).))))', u'(-13.50)']
        return float(self.ss_log.strip().split(' ')[-1].replace('(','').replace(')', ''))




[docs]
    def get_foldability(self, ss='', verbose=False):
        """Calculate foldability based on EntRNA.

        Steps:

        - parse SS into basepairs,
        - calculate foldabilty

        Configuration:

        - Set ENTRNA_PATH to the folder where ENTRNA_predict.py is.

        Cmd wrapper in here::

            python ENTRNA_predict.py --seq_file pseudoknotted_seq.txt --str_file pseudoknotted_str.txt

        Su, C., Weir, J. D., Zhang, F., Yan, H., & Wu, T. (2019).
        ENTRNA: a framework to predict RNA foldability. BMC Bioinformatics, 20(1), 1–11.
        http://doi.org/10.1186/s12859-019-2948-5
        """
        if ss:
            self.ss = ss

        # parse SS into base-pairs
        def dp_to_bp(dp):
            import numpy as np
            a_list = []
            bp_array = np.zeros(len(dp),dtype = int)
            for i in range(len(dp)):
                if dp[i] == "(":
                    a_list.append(i)
                if dp[i] == ")":
                    bp_array[i] = a_list[-1] + 1
                    bp_array[a_list[-1]] = i + 1
                    a_list.pop()
            return list(bp_array)

        bp = dp_to_bp(self.ss)
        if verbose: print(bp)

        tempstr = tempfile.NamedTemporaryFile(delete=False)
        with open(tempstr.name, 'w') as f:
            f.write(str(bp))

        tempseq = tempfile.NamedTemporaryFile(delete=False)
        with open(tempseq.name, 'w') as f:
            f.write(self.seq)

        # -W to silent warnings See [1]
        cmd = "cd " + ENTRNA_PATH + " && python -W ignore ENTRNA_predict.py --seq_file " + tempseq.name + " --str_file " + tempstr.name
        log = subprocess.check_output(cmd, shell=True).decode()
        if verbose:
            print(cmd)
            print(log)
        for l in log.split('\n'):
            if l.startswith('Foldability: '):
                return round(float(l.replace('Foldability: ', '')), 2)
        return -1

        ## [1]:
        ##     /Users/magnus/work/evoClustRNA/rna-foldability/ENTRNA/util/pseudoknot_free.py:22: SettingWithCopyWarning:
        ## A value is trying to be set on a copy of a slice from a DataFrame.
        ## Try using .loc[row_indexer,col_indexer] = value instead

        ## See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
        ##   df_v1['length'] = df_v1['seq'].apply(lambda x:len(x))
        ## /home/magnus/miniconda2/lib/python2.7/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
        ##   FutureWarning)
        ## cd /Users/magnus/work/evoClustRNA/rna-foldability/ENTRNA/ && python ENTRNA_predict.py --seq_file /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmpUORegp --str_file /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmp1ERCcD


[docs]
    def predict_ss(self, method="RNAfold", constraints='', enforce_constraint=False, shapefn='', explore='', verbose=0, path=''):
        """Predict secondary structure of the seq.

        Args:
          method: {mcfold, RNAfold}
          onstraints:
          shapefn (str): path to a file with shape reactivites
          verbose (boolean)

        It creates a seq fasta file and runs various methods for secondary structure
        prediction. You can provide also a constraints file for RNAfold and RNAsubopt.

        Methods that can be used with contraints: RNAsubopt, RNAfold, mcfold.

        Methods that can be used with SHAPE contraints: RNAfold.

        **ContextFold**

        Example::

            $ java -cp bin contextFold.app.Predict in:CCCCUUUGGGGG
            CCCCUUUGGGGG
            ((((....))))

        It seems that a seq has to be longer than 9. Otherwise::

            $ java -cp bin contextFold.app.Predict in:UUUUUUGGG
            Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException: 10

            # this is OK
            $ java -cp bin contextFold.app.Predict in:CCCCUUUGGG
            CCCCUUUGGG
            .(((...)))


        **RNAstructure**

        Example::

            >>> seq = RNASequence("GGGGUUUUCCC")
            >>> print(seq.predict_ss("rnastructure"))
            >  ENERGY = -4.4  rna_seq
            GGGGUUUUCCC
            ((((...))))

        and with the shape data::

            >>> print(seq.predict_ss("rnastructure", shapefn="data/shape.txt"))
            >  ENERGY = -0.2  rna_seq
            GGGGUUUUCCC
            .(((....)))

        the shape data::

            1 10
            2 1
            3 1

        You can easily see that the first G is unpaired right now! The reactivity of this G was
        set to 10. Worked!


        **MC-Fold**

        MC-Fold uses the online version of the tool, this is very powerful with constraints::

            rna_seq
            acucggcuaggcgaguauaaauagccgucaggccuagcgcguccaagccuagccccuucuggggcugggcgaagggucggg
            ((((........)))).......((((..............(((((((((((((((....)))))))))))))))..))))
            curl -Y 0 -y 300 -F "pass=lucy" -F mask="((((........)))).......((((..............(((((((((((((((....)))))))))))))))..))))" -F sequence="acucggcuaggcgaguauaaauagccgucaggccuagcgcguccaagccuagccccuucuggggcugggcgaagggucggg" https://www.major.iric.ca/cgi-bin/MC-Fold/mcfold.static.cgi
            mcfold::energy best dynamics programming: -53.91
            (-53.91, '((((........)))).......((((..............(((((((((((((((....)))))))))))))))..))))')

            curl -Y 0 -y 300 -F "pass=lucy" -F mask="((((........)))).......((((..............((((((((((..............))))))))))..))))" -F sequence="acucggcuaggcgaguauaaauagccgucaggccuagcgcguccaagccuagccccuucuggggcugggcgaagggucggg" https://www.major.iric.ca/cgi-bin/MC-Fold/mcfold.static.cgi
            mcfold::energy best dynamics programming: -34.77
            (-34.77, '((((........)))).......((((..............((((((((((..............))))))))))..))))')

            acucggcuaggcgaguauaaauagccgucaggccuagcgcguccaagccuagccccuucuggggcugggcgaagggucggg
            ((((........)))).......((((..............(((((((((((((((....)))))))))))))))..))))
            curl -Y 0 -y 300 -F "pass=lucy" -F mask="((((xxxxxxxx))))xxxxxxx((((xxxxxxxxxxxxxx((((((((((xxxxxxxxxxxxxx))))))))))xx))))" -F sequence="acucggcuaggcgaguauaaauagccgucaggccuagcgcguccaagccuagccccuucuggggcugggcgaagggucggg" https://www.major.iric.ca/cgi-bin/MC-Fold/mcfold.static.cgi
            mcfold::energy best dynamics programming: -34.77
            (-34.77, '((((........)))).......((((..............((((((((((..............))))))))))..))))')


            acucggcuaggcgaguauaaauagccgucaggccuagcgcguccaagccuagccccuucuggggcugggcgaagggucggg
            ((((........)))).......((((..............(((((((((((((((....)))))))))))))))..))))
            curl -Y 0 -y 300 -F "pass=lucy" -F mask="((((********))))*******((((**************((((((((((**************))))))))))**))))" -F sequence="acucggcuaggcgaguauaaauagccgucaggccuagcgcguccaagccuagccccuucuggggcugggcgaagggucggg" https://www.major.iric.ca/cgi-bin/MC-Fold/mcfold.static.cgi
            mcfold::energy best dynamics programming: -77.30
            (-71.12, '(((((((..))))))).......((((((.(((...)))..(((((((((((((((....)))))))))))))))))))))')

            acucggcuaggcgaguauaaauagccgucaggccuagcgcguccaagccuagccccuucuggggcugggcgaagggucggg
            ((((........)))).......((((..............(((((((((((((((....)))))))))))))))..))))
            curl -Y 0 -y 300 -F "pass=lucy" -F mask="((((**[[[[[**))))*******((((****]]]]]****(((((((((((((((****)))))))))))))))**))))" -F sequence="acucggcuaggcgaguauaaauagccgucaggccuagcgcguccaagccuagccccuucuggggcugggcgaagggucggg" https://www.major.iric.ca/cgi-bin/MC-Fold/mcfold.static.cgi
            mcfold::energy best dynamics programming: -77.30
            ('-77.30', '((((**[[[[[**))))*******((((****]]]]]****(((((((((((((((****)))))))))))))))**))))')

        **explore**

         The sub-optimal search space can be constrained within a percentage of the minimum free energy structure, as MC-fold makes use of the Waterman-Byers algorithm [18, 19]. Because the exploration has an exponential time complexity, increasing this value can have a dramatic effect on MC-Fold’s run time.

        Parisien, M., & Major, F. (2009). RNA Modeling Using the MC-Fold and MC-Sym Pipeline [Manual] (pp. 1–84).


        """

        tf = tempfile.NamedTemporaryFile(delete=False)
        tf.name += '.fa'
        with open(tf.name, 'w') as f:
            f.write('>' + self.name + '\n')
            f.write(self.seq + '\n')
            if constraints:
                f.write(constraints)

        # check for seq and constraints
        if constraints:
            if len(self.seq) != len(constraints):
                raise Exception('The seq and constraints should be of the same length: %i %s %i %s' % (len(self.seq), self.seq, len(constraints), constraints))

        # run prediction
        # rnafold without contraints
        if method == "RNAfold" and constraints:
            cmd = 'RNAfold -C < ' + tf.name
            if verbose:
                print(cmd)
            self.ss_log = subprocess.check_output(cmd, shell=True).decode()
            return '\n'.join(self.ss_log.strip().split('\n')[:])

        if method == "RNAfoldX" and constraints:
            if enforce_constraint:
                cmd = 'RNAfold -p -d2 --noLP -C --enforceConstraint < ' + tf.name
            else:
                cmd = 'RNAfold -p -d2 --noLP -C < ' + tf.name
            if verbose:
                print(cmd)

            try:
                self.ss_log = subprocess.check_output(cmd, shell=True).decode()
            except subprocess.CalledProcessError:
                 print('Error')
                 return 0, 'error', 0, '', 0, '', 0, 0

            if verbose:
                print(self.ss_log)
            # parse the results
            lines = self.ss_log.split('\n')
            if 'Supplied structure constraints create empty solution set for sequence' in self.ss_log:
                return 0, 'Supplied structure constraints create empty solution set for sequence', 0, '', 0, '', 0, 0
            #if not 'frequency of mfe structure' in self.ss_log:

            # RNAfold -p -d2 --noLP -C < /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmpGiUoo7.fa
            # >rna_seq
            # AAAUUAAGGGGAAGCGUUGAGCCGCUACCCAUAUGUGGUUCACUCGGAUAGCGGGGAGCUAAUAGUGAAACCGGCCCUUUAGGGG
            # ...((((((((.(((......((((((.((....(((...)))..)).))))))...)))..............))))))))... (-19.80)
            # ...{(((((((.(((......((((((.((....(((...)))..)).))))))...)))..............)))))))}... [-21.05]
            #...((((((((.(((......((((((.((....(((...)))..)).))))))...)))..............))))))))... {-19.80 d=2.34}
            # frequency of mfe structure in ensemble 0.131644; ensemble diversity 3.68

            mfess = lines[2].split()[0]
            mfe = ' '.join(lines[2].split()[-1:])
            mfe = float(mfe.replace('(', '').replace(')', '')) # (-19.80) ->-19.80

            efess = lines[3].split()[0]  # ensamble free energy
            efe = ' '.join(lines[3].split()[-1:])
            efe = float(efe.replace('[', '').replace(']', '')) # (-19.80) ->-19.80

            cfess = lines[4].split()[0]  # ensamble free energy
            cfe, d = ' '.join(lines[4].split()[1:]).split('d')
            cfe = float(cfe.replace('{', '').replace('}', '')) # (-19.80) ->-19.80

            words = lines[5].split()  # ensamble free energy
            freq = round(float(words[6].replace(';', '')), 2) # frequency of mfe structure in ensemble
            diversity = float(words[9])     # ensemble diversity

            if verbose:
                print(mfe, mfess, efe, efess, cfe, cfess, freq, diversity)
            return mfe, mfess, efe, efess, cfe, cfess, freq, diversity

        elif method == "RNAfold":
            cmd = 'RNAfold < ' + tf.name
            if verbose:
                print(cmd)
            self.ss_log = subprocess.check_output(cmd, shell=True).decode()
            return '\n'.join(self.ss_log.strip().split('\n')[:])

        elif method == "RNAsubopt" and constraints:
            cmd = 'RNAsubopt -C < ' + tf.name
            if verbose:
                print(cmd)
            self.ss_log = subprocess.check_output(cmd, shell=True).decode()
            return '\n'.join(self.ss_log.split('\n')[:])

        elif method == "RNAsubopt":
            cmd = 'RNAsubopt < ' + tf.name
            if verbose:
                print(cmd)
            self.ss_log = subprocess.check_output(cmd, shell=True).decode()
            return '\n'.join(self.ss_log.split('\n')[:])

        elif method == "mcfold":
            #  -F tope=1
            if explore:
                explore_str = " -F explore=" + str(explore)
            else:
                explore_str = ''

            #if constraints:
            #cmd = "curl -Y 0 -y 300 -F \"pass=lucy\" -F mask=\"" + constraints + "\" " + explore_str + \
            #" -F sequence=\"" + self.seq + "\" https://www.major.iric.ca/cgi-bin/MC-Fold/mcfold.static.cgi"
            cmd = "curl https://www.major.iric.ca/cgi-bin/MC-Fold/mcfold.static.cgi\?pass\=lucy\&sequence\=" + self.seq + "\&top\=20\&explore\=15\&name\=\&mask\='" + constraints + "'\&singlehigh\=\&singlemed\=\&singlelow\="
            # cmd = "curl -Y 0 -y 300 -F \"pass=lucy\" -F sequence=\"" + self.seq + "\" https://www.major.iric.ca/cgi-bin/MC-Fold/mcfold.static.cgi"
            if verbose:
                print(cmd)
                
            o = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            out = o.stdout.read().decode(errors='ignore').strip()
            err = o.stderr.read().decode(errors='ignore').strip()

            if verbose:
                print(out)

            # If the structure can't be find, detect this statement and finish this routine.
            if 'Explored 0 structures' in out:
                return 0.00, '', 'Explored 0 structures'

            comment = ''
            energy = ''
            out = out.split('\n')
            for l in out :
                # first you will find the best dynamic energy, and in the next loop
                # it will be used to search for lines with this energy and secondary
                # structure

                # (((..)))  -5.43
                if energy:  # if energy is set
                    if energy in l:
                        if verbose: print(l)
                        ss = l.split()[0]

                # Performing Dynamic Programming...
                # Best Dynamic Programming Solution has Energy:  -5.43
                if l.startswith('Best Dynamic Programming Solution has Energy:'):
                    energy_bdp = l.split(':')[1].strip()
                    if verbose:
                        print ('mcfold::energy best dynamics programming: ' + energy_bdp)
                        comment = 'energy best dynamics programming'
                    ss = constraints
                    # return float(energy), constraints # I'm not sure if this is good

            # Ok, for whatever reason Best DP energy might not be exactly the same as and
            # the energy listed later for secondary structure. So this code finds this secondary
            # structure and gets again the energy for this secondary structure,
            # and overwrites the previous energy.
            # In this case:
            # Best Dynamic Programming Solution has Energy:  -5.46
            # ...
            # CUCUCGAAAGAUG
            # (((.((..)))))  -5.44 ( +0.00)
            # (((.((..)))))  BP >=  50%

            # if evenn this will not find ss, then set ss to null not to crash
            # and it's possible, like in here
            # curl -Y 0 -y 300 -F "pass=lucy" -F mask="((******)))" -F sequence="CCUgcgcaAGG" \
            #            http://www.major.iric.ca/cgi-bin/MC-Fold/mcfold.static.cgi
            ss = ''
            for l in out:
                if 'target="_blank">MARNA</a>-formatted:<P><P><P></H2><pre>' in l:
                    index = out.index(l)
                    ss_line = out[index + 2]
                    ss, energy = ss_line.split()[0:2]  # '(((.((..)))))  -5.44 ( +0.00)'

                    # if there is
                    # UUGCCGUAAGACA
                    # .............  BP >=  50%
                    # then finish with energy 0.00, and empty ss
                    if energy == 'BP':
                        energy = energy_bdp
                        comment = 'BP energy'
                        return energy_bdp, constraints, comment
                    # break

            # prepare outputs, return and self-s
            self.log = out
            self.ss = ss
            return float(energy), ss, comment

        # if method == "RNAsubopt":
        #     from cogent.app.vienna_package import RNAfold, RNAsubopt
        #     r = RNAsubopt(WorkingDir="/tmp")
        #     res = r([self.seq])
        #     return str(res['StdOut'].read()).strip()

        # if method == 'RNAfold':
        #     from cogent.app.vienna_package import RNAfold, RNAsubopt
        #     r = RNAfold(WorkingDir="/tmp")
        #     res = r([self.seq])
        #     self.ss_log = res['StdOut'].read()
        #     return self.ss_log.strip().split('\n')[-1].split()[0]

        elif method == "ipknot":
            self.ss_log = subprocess.check_output(IPKNOT_PATH + ' ' + tf.name, shell=True)
            return '\n'.join(self.ss_log.decode().split('\n')[2:])

        elif method == "contextfold":
            if path:
                CONTEXTFOLD_PATH = path
            if not CONTEXTFOLD_PATH:
                print('Set up CONTEXTFOLD_PATH in configuration.')
                sys.exit(0)
            cmd = "cd " + CONTEXTFOLD_PATH + \
                  " && java -cp bin contextFold.app.Predict in:" + self.seq
            if verbose:
                print(cmd)
            self.ss_log = subprocess.check_output(cmd, shell=True).decode()
            return '\n'.join(self.ss_log.split('\n')[1:])

        elif method == "centroid_fold":
            self.ss_log = subprocess.check_output('centroid_fold ' + tf.name, shell=True)
            return '\n'.join(self.ss_log.split('\n')[2:])

        elif method == "rnastructure":
            cmd = RNASTRUCTURE_PATH + '/exe/Fold ' + tf.name + ' ' + tf.name + '.out '
            if shapefn:
                cmd += ' -sh ' + shapefn
            if verbose:
                print(cmd)
            o = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            stderr = o.stderr.read().strip()
            if stderr:
                print(stderr)

            cmd = RNASTRUCTURE_PATH + '/exe/ct2dot ' + tf.name + '.out 1 ' + \
                             tf.name + '.dot'
            if verbose:
                print(cmd)
            o = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            stderr = o.stderr.read().strip()
            if not stderr:
                with open(tf.name + '.dot') as f:
                    return f.read().strip()

        # (-51.15, '.(.(((((((((((((((..))))))))))))))))(..((((((((....)))).))))).')
        elif method == "rnastructure_CycleFold":
            cmd = RNASTRUCTURE_PATH + '/exe/CycleFold ' + tf.name + ' > ' + tf.name + '.ct '
            if verbose:
                print(cmd)
            o = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            stderr = o.stderr.read().strip()
            if stderr:
                print(stderr)

            # get energy
            energy = float(open(tf.name + '.ct').readline().split("energy:")[1].strip())  # >rna_seq	energy: -51.1500

            # get ss in dot-bracket notation
            cmd = RNASTRUCTURE_PATH + '/exe/ct2dot ' + tf.name + '.ct 1 ' + \
                             tf.name + '.dot'
            if verbose:
                print(cmd)
            o = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            stderr = o.stderr.read().strip()
            if not stderr:
                with open(tf.name + '.dot') as f:
                    # (-51.15, '.(.(((((((((((((((..))))))))))))))))(..((((((((....)))).))))).')
                    return energy, f.read().strip().split('\n')[2]


        else:
            raise MethodNotChosen('You have to define a correct method to use.')




# main


[docs]
def load_fasta_ss_into_RNAseqs(fn, debug=True):
    seqs = []
    with open(fn) as f:
        for line in f:
            if debug: print(line)
            name = line.replace('>', '').strip()
            seq = next(f).strip()
            ss = next(f).strip()
            rs = RNASequence(seq, ss, name)
            seqs.append(rs)
    return seqs


if __name__ == '__main__':
    #import doctest
    #doctest.testmod()

    seq = RNASequence('GAUCguaaGAUC')
    seq.name = 'RNA01'
    print(seq.predict_ss("mcfold"))
    sys.exit(1)

    seq = RNASequence("CGCUUCAUAUAAUCCUAAUGAUAUGGUUUGGGAGUUUCUACCAAGAGCCUUAAACUCUUGAUUAUGAAGUG")
    seq.name = 'RNA01'
    print(seq.predict_ss("RNAfold",
                          constraints="((((...............................................................))))"))  # noqa

    seq = RNASequence("CGCUUCAUAUAAUCCUAAUGAUAUGGUUUGGGAGUUUCUACCAAGAGCCUUAAACUCUUGAUUAUGAAGUG")
    seq.name = 'RNA02'
    print(seq.predict_ss("RNAsubopt",
                         constraints="((((...............................................................))))"))  # noqa

    print(seq.predict_ss("contextfold"))
    print(seq.predict_ss(method="ipknot"))

    verbose = False
    seq = RNASequence("GGGGUUUUCCC")
    print(seq.predict_ss("rnastructure", verbose=verbose))
    print(seq.predict_ss("rnastructure", shapefn="data/shape.txt", verbose=verbose))

    seq = RNASequence("CGUGGUUAGGGCCACGUUAAAUAGUUGCUUAAGCCCUAAGCGUUGAUAAAUAUCAGgUGCAA")
    print(seq.predict_ss("rnastructure", shapefn="data/shape.txt", verbose=verbose))

    #
    # test of MethodNotChose
    # print(seq.predict_ss("test"))