Source code for rna_tools.Seq

#!/usr/bin/env python3
#-*- coding: utf-8 -*-
"""RNA Sequence with secondary structure prediction methods.

This tool takes a given sequence and returns the secondary structure prediction provided by 5 different tools: RNAfold, RNAsubopt, ipknot, contextfold and centroid_fold. You must have these tools installed. You don't have to install all tools if you want to use only one of the methods.

It's easy to add more methods of your choince to this class.

Installation
~~~~~~~~~~~~~
Depends on what tools you want to use, follow the instructions below.

ContextFold
^^^^^^^^^^^^^^^^^^^^^
https://www.cs.bgu.ac.il/~negevcb/contextfold/

needs Java. Try this on Ubuntu 14-04 https://askubuntu.com/questions/521145/how-to-install-oracle-java-on-ubuntu-14-04 Single chain only!

ViennaRNA
^^^^^^^^^^^^^^
https://www.tbi.univie.ac.at/RNA/

For OSX install from the binary Installer from the page.

ipknot OSX
^^^^^^^^^^^^^
https://github.com/satoken/homebrew-rnatools

If one encounters a problem::

    [mm] Desktop$ /usr/local/opt/bin/ipknot
    dyld: Library not loaded: /usr/local/opt/glpk/lib/libglpk.40.dylib
      Referenced from: /usr/local/opt/bin/ipknot
      Reason: image not found
    [1]    51654 abort      /usr/local/opt/bin/ipknot

the solution is::

     brew install glpk # on OSX

RNA Structure
^^^^^^^^^^^^^
http://rna.urmc.rochester.edu/

Works with 5.8.1; Jun 16, 2016.

Download http://rna.urmc.rochester.edu/RNAstructureDownload.html and untar it in ``<RNA_PDB_TOOLS>/opt/RNAstructure/``; run make, the tools will be compiled in a folder ``exe``. Set up ``DATPATH`` in your bashrc to ``<RNA_PDB_TOOLS>/opt/RNAstructure/data_tables`` ``DATAPATH=/home/magnus/work/src/rna-pdb-tools/opt/RNAstructure/data_tables/`` (read more http://rna.urmc.rochester.edu/Text/Thermodynamics.html). RNAstructure can be run with SHAPE restraints, read more http://rna.urmc.rochester.edu/Text/File_Formats.html#Constraint about the format. The file format for SHAPE reactivity comprises two columns. The first column is the nucleotide number, and the second is the reactivity. Nucleotides for which there is no SHAPE data can either be left out of the file, or the reactivity can be entered as less than -500. Columns are separated by any white space.

MC-Sym
^^^^^^^^^^^^^

FAQ
~~~~~~~~~~~~~

- Does it work for more than one chain??? Hmm.. I think it's not. You have to check on your own. --magnus

TIPS
~~~~~~~~~~~~~

Should you need to run it on a list of sequences, use the following script::

    from rna_tools import Seq
    f = open("listOfSequences.fasta")
    for line in f:
     if line.startswith('>'):
       print line,
     else:
       print line,
       s = Seq.Seq(line.strip()) # module first Seq and class second Seq #without strip this has two lines
       print s.predict_ss(method="contextfold"),
       #print s.predict_ss(method="centroid_fold")

TODO
~~~~~~~~~~~~~

- This calss should be renamed to RNASeq and merged with RNASeq class from RNAalignment

"""  # noqa
import os
import subprocess
import tempfile
import sys

from rna_tools.SecondaryStructure import parse_vienna_to_pairs
from rna_tools.rna_tools_config import CONTEXTFOLD_PATH, RNASTRUCTURE_PATH, ENTRNA_PATH, IPKNOT_PATH


[docs] class MethodNotChosen(Exception): pass
[docs] class RNASequence(object): """RNASequence. Usage:: >>> seq = RNASequence("CCCCUUUUGGGG") >>> seq.name = 'RNA03' >>> print(seq.predict_ss("RNAfold", constraints="((((....))))")) >RNA03 CCCCUUUUGGGG ((((....)))) ( -6.40) """ def __init__(self, seq, ss='', name='rna_seq'): self.seq = seq self.ss = ss self.ss_log = '' self.name = name def __repr__(self): return self.name + '\n' + self.seq + '\n' + self.ss
[docs] def eval(self, ss='', no_dangling_end_energies=False, verbose=False): """Evaluate energy of RNA sequence. Args: ss (optional), if not set, then self.ss is taken for calc no_dangling_end_energies (Boolean) verbose (Boolean) Returns: Energy (float) The RNAeval web server calculates the energy of a RNA sequence on a given secondary structure. You can use it to get a detailed thermodynamic description (loop free-energy decomposition) of your RNA structures. Simply paste or upload your sequence below and click Proceed. To get more information on the meaning of the options click the help symbols. You can test the server using this sample sequence/structure pair. An equivalent RNAeval command line call would have been:: RNAeval -v -d0 < input.txt Read more: <http://rna.tbi.univie.ac.at//cgi-bin/RNAWebSuite/RNAeval.cgi> """ tf = tempfile.NamedTemporaryFile(delete=False) if not ss: ss = self.ss tf.name += '.fa' with open(tf.name, 'w') as f: f.write('>' + self.name + '\n') f.write(self.seq + '\n') f.write(ss + '\n') dopt = ' -d2 ' if no_dangling_end_energies: dopt = ' -d0 ' cmd = 'RNAeval ' + dopt + ' < ' + tf.name if verbose: print(cmd) self.ss_log = subprocess.check_output(cmd, shell=True).decode() # [u'>rna_seq\nGGCAGGGGCGCUUCGGCCCCCUAUGCC\n((((((((.((....)).)))).))))', u'(-13.50)'] return float(self.ss_log.strip().split(' ')[-1].replace('(','').replace(')', ''))
[docs] def get_foldability(self, ss='', verbose=False): """Calculate foldability based on EntRNA. Steps: - parse SS into basepairs, - calculate foldabilty Configuration: - Set ENTRNA_PATH to the folder where ENTRNA_predict.py is. Cmd wrapper in here:: python ENTRNA_predict.py --seq_file pseudoknotted_seq.txt --str_file pseudoknotted_str.txt Su, C., Weir, J. D., Zhang, F., Yan, H., & Wu, T. (2019). ENTRNA: a framework to predict RNA foldability. BMC Bioinformatics, 20(1), 1–11. http://doi.org/10.1186/s12859-019-2948-5 """ if ss: self.ss = ss # parse SS into base-pairs def dp_to_bp(dp): import numpy as np a_list = [] bp_array = np.zeros(len(dp),dtype = int) for i in range(len(dp)): if dp[i] == "(": a_list.append(i) if dp[i] == ")": bp_array[i] = a_list[-1] + 1 bp_array[a_list[-1]] = i + 1 a_list.pop() return list(bp_array) bp = dp_to_bp(self.ss) if verbose: print(bp) tempstr = tempfile.NamedTemporaryFile(delete=False) with open(tempstr.name, 'w') as f: f.write(str(bp)) tempseq = tempfile.NamedTemporaryFile(delete=False) with open(tempseq.name, 'w') as f: f.write(self.seq) # -W to silent warnings See [1] cmd = "cd " + ENTRNA_PATH + " && python -W ignore ENTRNA_predict.py --seq_file " + tempseq.name + " --str_file " + tempstr.name log = subprocess.check_output(cmd, shell=True).decode() if verbose: print(cmd) print(log) for l in log.split('\n'): if l.startswith('Foldability: '): return round(float(l.replace('Foldability: ', '')), 2) return -1
## [1]: ## /Users/magnus/work/evoClustRNA/rna-foldability/ENTRNA/util/pseudoknot_free.py:22: SettingWithCopyWarning: ## A value is trying to be set on a copy of a slice from a DataFrame. ## Try using .loc[row_indexer,col_indexer] = value instead ## See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy ## df_v1['length'] = df_v1['seq'].apply(lambda x:len(x)) ## /home/magnus/miniconda2/lib/python2.7/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning. ## FutureWarning) ## cd /Users/magnus/work/evoClustRNA/rna-foldability/ENTRNA/ && python ENTRNA_predict.py --seq_file /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmpUORegp --str_file /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmp1ERCcD
[docs] def predict_ss(self, method="RNAfold", constraints='', enforce_constraint=False, shapefn='', explore='', verbose=0, path=''): """Predict secondary structure of the seq. Args: method: {mcfold, RNAfold} onstraints: shapefn (str): path to a file with shape reactivites verbose (boolean) It creates a seq fasta file and runs various methods for secondary structure prediction. You can provide also a constraints file for RNAfold and RNAsubopt. Methods that can be used with contraints: RNAsubopt, RNAfold, mcfold. Methods that can be used with SHAPE contraints: RNAfold. **ContextFold** Example:: $ java -cp bin contextFold.app.Predict in:CCCCUUUGGGGG CCCCUUUGGGGG ((((....)))) It seems that a seq has to be longer than 9. Otherwise:: $ java -cp bin contextFold.app.Predict in:UUUUUUGGG Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException: 10 # this is OK $ java -cp bin contextFold.app.Predict in:CCCCUUUGGG CCCCUUUGGG .(((...))) **RNAstructure** Example:: >>> seq = RNASequence("GGGGUUUUCCC") >>> print(seq.predict_ss("rnastructure")) > ENERGY = -4.4 rna_seq GGGGUUUUCCC ((((...)))) and with the shape data:: >>> print(seq.predict_ss("rnastructure", shapefn="data/shape.txt")) > ENERGY = -0.2 rna_seq GGGGUUUUCCC .(((....))) the shape data:: 1 10 2 1 3 1 You can easily see that the first G is unpaired right now! The reactivity of this G was set to 10. Worked! **MC-Fold** MC-Fold uses the online version of the tool, this is very powerful with constraints:: rna_seq acucggcuaggcgaguauaaauagccgucaggccuagcgcguccaagccuagccccuucuggggcugggcgaagggucggg ((((........)))).......((((..............(((((((((((((((....)))))))))))))))..)))) curl -Y 0 -y 300 -F "pass=lucy" -F mask="((((........)))).......((((..............(((((((((((((((....)))))))))))))))..))))" -F sequence="acucggcuaggcgaguauaaauagccgucaggccuagcgcguccaagccuagccccuucuggggcugggcgaagggucggg" https://www.major.iric.ca/cgi-bin/MC-Fold/mcfold.static.cgi mcfold::energy best dynamics programming: -53.91 (-53.91, '((((........)))).......((((..............(((((((((((((((....)))))))))))))))..))))') curl -Y 0 -y 300 -F "pass=lucy" -F mask="((((........)))).......((((..............((((((((((..............))))))))))..))))" -F sequence="acucggcuaggcgaguauaaauagccgucaggccuagcgcguccaagccuagccccuucuggggcugggcgaagggucggg" https://www.major.iric.ca/cgi-bin/MC-Fold/mcfold.static.cgi mcfold::energy best dynamics programming: -34.77 (-34.77, '((((........)))).......((((..............((((((((((..............))))))))))..))))') acucggcuaggcgaguauaaauagccgucaggccuagcgcguccaagccuagccccuucuggggcugggcgaagggucggg ((((........)))).......((((..............(((((((((((((((....)))))))))))))))..)))) curl -Y 0 -y 300 -F "pass=lucy" -F mask="((((xxxxxxxx))))xxxxxxx((((xxxxxxxxxxxxxx((((((((((xxxxxxxxxxxxxx))))))))))xx))))" -F sequence="acucggcuaggcgaguauaaauagccgucaggccuagcgcguccaagccuagccccuucuggggcugggcgaagggucggg" https://www.major.iric.ca/cgi-bin/MC-Fold/mcfold.static.cgi mcfold::energy best dynamics programming: -34.77 (-34.77, '((((........)))).......((((..............((((((((((..............))))))))))..))))') acucggcuaggcgaguauaaauagccgucaggccuagcgcguccaagccuagccccuucuggggcugggcgaagggucggg ((((........)))).......((((..............(((((((((((((((....)))))))))))))))..)))) curl -Y 0 -y 300 -F "pass=lucy" -F mask="((((********))))*******((((**************((((((((((**************))))))))))**))))" -F sequence="acucggcuaggcgaguauaaauagccgucaggccuagcgcguccaagccuagccccuucuggggcugggcgaagggucggg" https://www.major.iric.ca/cgi-bin/MC-Fold/mcfold.static.cgi mcfold::energy best dynamics programming: -77.30 (-71.12, '(((((((..))))))).......((((((.(((...)))..(((((((((((((((....)))))))))))))))))))))') acucggcuaggcgaguauaaauagccgucaggccuagcgcguccaagccuagccccuucuggggcugggcgaagggucggg ((((........)))).......((((..............(((((((((((((((....)))))))))))))))..)))) curl -Y 0 -y 300 -F "pass=lucy" -F mask="((((**[[[[[**))))*******((((****]]]]]****(((((((((((((((****)))))))))))))))**))))" -F sequence="acucggcuaggcgaguauaaauagccgucaggccuagcgcguccaagccuagccccuucuggggcugggcgaagggucggg" https://www.major.iric.ca/cgi-bin/MC-Fold/mcfold.static.cgi mcfold::energy best dynamics programming: -77.30 ('-77.30', '((((**[[[[[**))))*******((((****]]]]]****(((((((((((((((****)))))))))))))))**))))') **explore** The sub-optimal search space can be constrained within a percentage of the minimum free energy structure, as MC-fold makes use of the Waterman-Byers algorithm [18, 19]. Because the exploration has an exponential time complexity, increasing this value can have a dramatic effect on MC-Fold’s run time. Parisien, M., & Major, F. (2009). RNA Modeling Using the MC-Fold and MC-Sym Pipeline [Manual] (pp. 1–84). """ tf = tempfile.NamedTemporaryFile(delete=False) tf.name += '.fa' with open(tf.name, 'w') as f: f.write('>' + self.name + '\n') f.write(self.seq + '\n') if constraints: f.write(constraints) # check for seq and constraints if constraints: if len(self.seq) != len(constraints): raise Exception('The seq and constraints should be of the same length: %i %s %i %s' % (len(self.seq), self.seq, len(constraints), constraints)) # run prediction # rnafold without contraints if method == "RNAfold" and constraints: cmd = 'RNAfold -C < ' + tf.name if verbose: print(cmd) self.ss_log = subprocess.check_output(cmd, shell=True).decode() return '\n'.join(self.ss_log.strip().split('\n')[:]) if method == "RNAfoldX" and constraints: if enforce_constraint: cmd = 'RNAfold -p -d2 --noLP -C --enforceConstraint < ' + tf.name else: cmd = 'RNAfold -p -d2 --noLP -C < ' + tf.name if verbose: print(cmd) try: self.ss_log = subprocess.check_output(cmd, shell=True).decode() except subprocess.CalledProcessError: print('Error') return 0, 'error', 0, '', 0, '', 0, 0 if verbose: print(self.ss_log) # parse the results lines = self.ss_log.split('\n') if 'Supplied structure constraints create empty solution set for sequence' in self.ss_log: return 0, 'Supplied structure constraints create empty solution set for sequence', 0, '', 0, '', 0, 0 #if not 'frequency of mfe structure' in self.ss_log: # RNAfold -p -d2 --noLP -C < /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmpGiUoo7.fa # >rna_seq # AAAUUAAGGGGAAGCGUUGAGCCGCUACCCAUAUGUGGUUCACUCGGAUAGCGGGGAGCUAAUAGUGAAACCGGCCCUUUAGGGG # ...((((((((.(((......((((((.((....(((...)))..)).))))))...)))..............))))))))... (-19.80) # ...{(((((((.(((......((((((.((....(((...)))..)).))))))...)))..............)))))))}... [-21.05] #...((((((((.(((......((((((.((....(((...)))..)).))))))...)))..............))))))))... {-19.80 d=2.34} # frequency of mfe structure in ensemble 0.131644; ensemble diversity 3.68 mfess = lines[2].split()[0] mfe = ' '.join(lines[2].split()[-1:]) mfe = float(mfe.replace('(', '').replace(')', '')) # (-19.80) ->-19.80 efess = lines[3].split()[0] # ensamble free energy efe = ' '.join(lines[3].split()[-1:]) efe = float(efe.replace('[', '').replace(']', '')) # (-19.80) ->-19.80 cfess = lines[4].split()[0] # ensamble free energy cfe, d = ' '.join(lines[4].split()[1:]).split('d') cfe = float(cfe.replace('{', '').replace('}', '')) # (-19.80) ->-19.80 words = lines[5].split() # ensamble free energy freq = round(float(words[6].replace(';', '')), 2) # frequency of mfe structure in ensemble diversity = float(words[9]) # ensemble diversity if verbose: print(mfe, mfess, efe, efess, cfe, cfess, freq, diversity) return mfe, mfess, efe, efess, cfe, cfess, freq, diversity elif method == "RNAfold": cmd = 'RNAfold < ' + tf.name if verbose: print(cmd) self.ss_log = subprocess.check_output(cmd, shell=True).decode() return '\n'.join(self.ss_log.strip().split('\n')[:]) elif method == "RNAsubopt" and constraints: cmd = 'RNAsubopt -C < ' + tf.name if verbose: print(cmd) self.ss_log = subprocess.check_output(cmd, shell=True).decode() return '\n'.join(self.ss_log.split('\n')[:]) elif method == "RNAsubopt": cmd = 'RNAsubopt < ' + tf.name if verbose: print(cmd) self.ss_log = subprocess.check_output(cmd, shell=True).decode() return '\n'.join(self.ss_log.split('\n')[:]) elif method == "mcfold": # -F tope=1 if explore: explore_str = " -F explore=" + str(explore) else: explore_str = '' #if constraints: #cmd = "curl -Y 0 -y 300 -F \"pass=lucy\" -F mask=\"" + constraints + "\" " + explore_str + \ #" -F sequence=\"" + self.seq + "\" https://www.major.iric.ca/cgi-bin/MC-Fold/mcfold.static.cgi" cmd = "curl https://www.major.iric.ca/cgi-bin/MC-Fold/mcfold.static.cgi\?pass\=lucy\&sequence\=" + self.seq + "\&top\=20\&explore\=15\&name\=\&mask\='" + constraints + "'\&singlehigh\=\&singlemed\=\&singlelow\=" # cmd = "curl -Y 0 -y 300 -F \"pass=lucy\" -F sequence=\"" + self.seq + "\" https://www.major.iric.ca/cgi-bin/MC-Fold/mcfold.static.cgi" if verbose: print(cmd) o = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out = o.stdout.read().decode(errors='ignore').strip() err = o.stderr.read().decode(errors='ignore').strip() if verbose: print(out) # If the structure can't be find, detect this statement and finish this routine. if 'Explored 0 structures' in out: return 0.00, '', 'Explored 0 structures' comment = '' energy = '' out = out.split('\n') for l in out : # first you will find the best dynamic energy, and in the next loop # it will be used to search for lines with this energy and secondary # structure # (((..))) -5.43 if energy: # if energy is set if energy in l: if verbose: print(l) ss = l.split()[0] # Performing Dynamic Programming... # Best Dynamic Programming Solution has Energy: -5.43 if l.startswith('Best Dynamic Programming Solution has Energy:'): energy_bdp = l.split(':')[1].strip() if verbose: print ('mcfold::energy best dynamics programming: ' + energy_bdp) comment = 'energy best dynamics programming' ss = constraints # return float(energy), constraints # I'm not sure if this is good # Ok, for whatever reason Best DP energy might not be exactly the same as and # the energy listed later for secondary structure. So this code finds this secondary # structure and gets again the energy for this secondary structure, # and overwrites the previous energy. # In this case: # Best Dynamic Programming Solution has Energy: -5.46 # ... # CUCUCGAAAGAUG # (((.((..))))) -5.44 ( +0.00) # (((.((..))))) BP >= 50% # if evenn this will not find ss, then set ss to null not to crash # and it's possible, like in here # curl -Y 0 -y 300 -F "pass=lucy" -F mask="((******)))" -F sequence="CCUgcgcaAGG" \ # http://www.major.iric.ca/cgi-bin/MC-Fold/mcfold.static.cgi ss = '' for l in out: if 'target="_blank">MARNA</a>-formatted:<P><P><P></H2><pre>' in l: index = out.index(l) ss_line = out[index + 2] ss, energy = ss_line.split()[0:2] # '(((.((..))))) -5.44 ( +0.00)' # if there is # UUGCCGUAAGACA # ............. BP >= 50% # then finish with energy 0.00, and empty ss if energy == 'BP': energy = energy_bdp comment = 'BP energy' return energy_bdp, constraints, comment # break # prepare outputs, return and self-s self.log = out self.ss = ss return float(energy), ss, comment # if method == "RNAsubopt": # from cogent.app.vienna_package import RNAfold, RNAsubopt # r = RNAsubopt(WorkingDir="/tmp") # res = r([self.seq]) # return str(res['StdOut'].read()).strip() # if method == 'RNAfold': # from cogent.app.vienna_package import RNAfold, RNAsubopt # r = RNAfold(WorkingDir="/tmp") # res = r([self.seq]) # self.ss_log = res['StdOut'].read() # return self.ss_log.strip().split('\n')[-1].split()[0] elif method == "ipknot": self.ss_log = subprocess.check_output(IPKNOT_PATH + ' ' + tf.name, shell=True) return '\n'.join(self.ss_log.decode().split('\n')[2:]) elif method == "contextfold": if path: CONTEXTFOLD_PATH = path if not CONTEXTFOLD_PATH: print('Set up CONTEXTFOLD_PATH in configuration.') sys.exit(0) cmd = "cd " + CONTEXTFOLD_PATH + \ " && java -cp bin contextFold.app.Predict in:" + self.seq if verbose: print(cmd) self.ss_log = subprocess.check_output(cmd, shell=True).decode() return '\n'.join(self.ss_log.split('\n')[1:]) elif method == "centroid_fold": self.ss_log = subprocess.check_output('centroid_fold ' + tf.name, shell=True) return '\n'.join(self.ss_log.split('\n')[2:]) elif method == "rnastructure": cmd = RNASTRUCTURE_PATH + '/exe/Fold ' + tf.name + ' ' + tf.name + '.out ' if shapefn: cmd += ' -sh ' + shapefn if verbose: print(cmd) o = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stderr = o.stderr.read().strip() if stderr: print(stderr) cmd = RNASTRUCTURE_PATH + '/exe/ct2dot ' + tf.name + '.out 1 ' + \ tf.name + '.dot' if verbose: print(cmd) o = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stderr = o.stderr.read().strip() if not stderr: with open(tf.name + '.dot') as f: return f.read().strip() # (-51.15, '.(.(((((((((((((((..))))))))))))))))(..((((((((....)))).))))).') elif method == "rnastructure_CycleFold": cmd = RNASTRUCTURE_PATH + '/exe/CycleFold ' + tf.name + ' > ' + tf.name + '.ct ' if verbose: print(cmd) o = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stderr = o.stderr.read().strip() if stderr: print(stderr) # get energy energy = float(open(tf.name + '.ct').readline().split("energy:")[1].strip()) # >rna_seq energy: -51.1500 # get ss in dot-bracket notation cmd = RNASTRUCTURE_PATH + '/exe/ct2dot ' + tf.name + '.ct 1 ' + \ tf.name + '.dot' if verbose: print(cmd) o = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stderr = o.stderr.read().strip() if not stderr: with open(tf.name + '.dot') as f: # (-51.15, '.(.(((((((((((((((..))))))))))))))))(..((((((((....)))).))))).') return energy, f.read().strip().split('\n')[2] else: raise MethodNotChosen('You have to define a correct method to use.')
# main
[docs] def load_fasta_ss_into_RNAseqs(fn, debug=True): seqs = [] with open(fn) as f: for line in f: if debug: print(line) name = line.replace('>', '').strip() seq = next(f).strip() ss = next(f).strip() rs = RNASequence(seq, ss, name) seqs.append(rs) return seqs
if __name__ == '__main__': #import doctest #doctest.testmod() seq = RNASequence('GAUCguaaGAUC') seq.name = 'RNA01' print(seq.predict_ss("mcfold")) sys.exit(1) seq = RNASequence("CGCUUCAUAUAAUCCUAAUGAUAUGGUUUGGGAGUUUCUACCAAGAGCCUUAAACUCUUGAUUAUGAAGUG") seq.name = 'RNA01' print(seq.predict_ss("RNAfold", constraints="((((...............................................................))))")) # noqa seq = RNASequence("CGCUUCAUAUAAUCCUAAUGAUAUGGUUUGGGAGUUUCUACCAAGAGCCUUAAACUCUUGAUUAUGAAGUG") seq.name = 'RNA02' print(seq.predict_ss("RNAsubopt", constraints="((((...............................................................))))")) # noqa print(seq.predict_ss("contextfold")) print(seq.predict_ss(method="ipknot")) verbose = False seq = RNASequence("GGGGUUUUCCC") print(seq.predict_ss("rnastructure", verbose=verbose)) print(seq.predict_ss("rnastructure", shapefn="data/shape.txt", verbose=verbose)) seq = RNASequence("CGUGGUUAGGGCCACGUUAAAUAGUUGCUUAAGCCCUAAGCGUUGAUAAAUAUCAGgUGCAA") print(seq.predict_ss("rnastructure", shapefn="data/shape.txt", verbose=verbose)) # # test of MethodNotChose # print(seq.predict_ss("test"))