mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-12-22 11:08:17 +00:00
Adding back in all input files we had before
This commit is contained in:
parent
45cc9ede3e
commit
1c84a234ec
18 changed files with 67280 additions and 0 deletions
66946
tests/data/fasta/E_coli/NC_010473.fa
Normal file
66946
tests/data/fasta/E_coli/NC_010473.fa
Normal file
File diff suppressed because it is too large
Load diff
265
tests/data/fasta/msa/BBA0001.tfa
Normal file
265
tests/data/fasta/msa/BBA0001.tfa
Normal file
|
@ -0,0 +1,265 @@
|
||||||
|
>seq001
|
||||||
|
MAGKRKRANAPDQTERRSSVRVQKVRQKALDEKARLVQERVKLLSDRKSEICVDDTELHE
|
||||||
|
KEEENVDGSPKRRSPPKLTAMQKGKQKLSVSLNGKDVNLEPHLKVTKCLRLFNKQYLLCV
|
||||||
|
QAKLSRPDLKGVTEMIKAKAILYPRKIIGDLPGIDVGHRFFSRAEMCAVGFHNHWLNGID
|
||||||
|
YMSMEYEKEYSNYKLPLAVSIVMSGQYEDDLDNADTVTYTGQGGHNLTGNKRQIKDQLLE
|
||||||
|
RGNLALKHCCEYNVPVRVTRGHNCKSSYTKRVYTYDGLYKVEKFWAQKGVSGFTVYKYRL
|
||||||
|
KRLEGQPELTTDQVNFVAGRIPTSTSEIEGLVCEDISGGLEFKGIPATNRVDDSPVSPTS
|
||||||
|
GFTYIKSLIIEPNVIIPKSSTGCNCRGSCTDSKKCACAKLNGGNFPYVDLNDGRLIESRD
|
||||||
|
VVFECGPHCGCGPKCVNRTSQKRLRFNLEVFRSAKKGWAVRSWEYIPAGSPVCEYIGVVR
|
||||||
|
RTADVDTISDNEYIFEIDCQQTMQGLGGRQRRLRDVAVPMNNGVSQSSEDENAPEFCIDA
|
||||||
|
GSTGNFARFINHSCEPNLFVQCVLSSHQDIRLARVVLFAADNISPMQELTYDYGYALDSV
|
||||||
|
HGPDGKVKQLACYCGALNCRKRLY
|
||||||
|
>seq002
|
||||||
|
MPRHFGAVPGVVPGMAFVNRQELRDAGVHLPTQAGISGSASEGADSIVLSGGYEDDRDEG
|
||||||
|
DVILYTGEGGRDPLTGHQVKPQQLVRGNLALAISHRDGLPLRVTRGHRHSSQFSPQSGYQ
|
||||||
|
YAGLYRVDDHWREVGRSGFLIWRFRLTRLENQDAHHAGADPQHPDSQHPERRPTLVQRIV
|
||||||
|
RDTATARAVKALYDHRCQVCGERLETPAGAYAEAAHIRPLGAPHHGPDVAGNILCLCPNH
|
||||||
|
HVLFDFGAFSVGDDLRLLGLPGRLHVHPQHAVDREHLAYHRRHYALQAGLEWGCSVPLT
|
||||||
|
>seq003
|
||||||
|
MGVMENLMVHTEISKVKSQSNGEVEKRGVSVLENGGVCKLDRMSGLKFKRRKVFAVRDFP
|
||||||
|
PGCGSRAMEVKIACENGNVVEDVKVVESLVKEEESLGQRDASENVSDIRMAEPVEVQPLR
|
||||||
|
ICLPGGDVVRDLSVTAGDECSNSEQIVAGSGVSSSSGTENIVRDIVVYADESSLGMDNLD
|
||||||
|
QTQPLEIEMSDVAVAKPRLVAGRKKAKKGIACHSSLKVVSREFGEGSRKKKSKKNLYWRD
|
||||||
|
RESLDSPEQLRILGVGTSSGSSSGDSSRNKVKETLRLFHGVCRKILQEDEAKPEDQRRKG
|
||||||
|
KGLRIDFEASTILKRNGKFLNSGVHILGEVPGVEVGDEFQYRMELNILGIHKPSQAGIDY
|
||||||
|
MKYGKAKVATSIVASGGYDDHLDNSDVLTYTGQGGNVMQVKKKGEELKEPEDQKLITGNL
|
||||||
|
ALATSIEKQTPVRVIRGKHKSTHDKSKGGNYVYDGLYLVEKYWQQVGSHGMNVFKFQLRR
|
||||||
|
IPGQPELSWVEVKKSKSKYREGLCKLDISEGKEQSPISAVNEIDDEKPPLFTYTVKLIYP
|
||||||
|
DWCRPVPPKSCCCTTRCTEAEARVCACVEKNGGEIPYNFDGAIVGAKPTIYECGPLCKCP
|
||||||
|
SSCYLRVTQHGIKLPLEIFKTKSRGWGVRCLKSIPIGSFICEYVGELLEDSEAERRIGND
|
||||||
|
EYLFDIGNRYDNSLAQGMSELMLGTQAGRSMAEGDESSGFTIDAASKGNVGRFINHSCSP
|
||||||
|
NLYAQNVLYDHEDSRIPHVMFFAQDNIPPLQELCYDYNYALDQVRDSKGNIKQKPCFCGA
|
||||||
|
AVCRRRLY
|
||||||
|
>seq004
|
||||||
|
MSTLLPFPDLNLMPDSQSSTAGTTAGDTVVTGKLEVKSEPIEEWQTPPSSTSDQSANTDL
|
||||||
|
IAEFIRISELFRSAFKPLQVKGLDGVSVYGLDSGAIVAVPEKENRELIEPPPGFKDNRVS
|
||||||
|
TVVVSPKFERPRELARIAILGHEQRKELRQVMKRTRMTYESLRIHLMAESMKNHVLGQGR
|
||||||
|
RRRSDMAAAYIMRDRGLWLNYDKHIVGPVTGVEVGDIFFYRMELCVLGLHGQTQAGIDCL
|
||||||
|
TAERSATGEPIATSIVVSGGYEDDEDTGDVLVYTGHGGQDHQHKQCDNQRLVGGNLGMER
|
||||||
|
SMHYGIEVRVIRGIKYENSISSKVYVYDGLYKIVDWWFAVGKSGFGVFKFRLVRIEGQPM
|
||||||
|
MGSAVMRFAQTLRNKPSMVRPTGYVSFDLSNKKENVPVFLYNDVDGDQEPRHYEYIAKAV
|
||||||
|
FPPGIFGQGGISRTGCECKLSCTDDCLCARKNGGEFAYDDNGHLLKGKHVVFECGEFCTC
|
||||||
|
GPSCKSRVTQKGLRNRLEVFRSKETGWGVRTLDLIEAGAFICEYAGVVVTRLQAEILSMN
|
||||||
|
GDVMVYPGRFTDQWRNWGDLSQVYPDFVRPNYPSLPPLDFSMDVSRMRNVACYISHSKEP
|
||||||
|
NVMVQFVLHDHNHLMFPRVMLFALENISPLAELSLDYGLADEVNGKLAICN
|
||||||
|
>seq005
|
||||||
|
MVHSESSILSSLRGGDGGGIPCSKDELAINGSYTDPMGRRKSKRFKVAAESEFSPDFGSI
|
||||||
|
TRQLRSRRMQKEFTVETYETRNVSDVCVLSSQADVELIPGEIVAERDSFKSVDCNDMSVG
|
||||||
|
LTEGAESLGVNMQEPMKDRNMPENTSEQNMVEVHPPSISLPEEDMMGSVCRKSITGTKEL
|
||||||
|
HGRTISVGRDLSPNMGSKFSKNGKTAKRSISVEEENLVLEKSDSGDHLGPSPEVLELEKS
|
||||||
|
EVWIITDKGVVMPSPVKPSEKRNGDYGEGSMRKNSERVALDKKRLASKFRLSNGGLPSCS
|
||||||
|
SSGDSARYKVKETMRLFHETCKKIMQEEEARPRKRDGGNFKVVCEASKILKSKGKNLYSG
|
||||||
|
TQIIGTVPGVEVGDEFQYRMELNLLGIHRPSQSGIDYMKDDGGELVATSIVSSGGYNDVL
|
||||||
|
DNSDVLIYTGQGGNVGKKKNNEPPKDQQLVTGNLALKNSINKKNPVRVIRGIKNTTLQSS
|
||||||
|
VVAKNYVYDGLYLVEEYWEETGSHGKLVFKFKLRRIPGQPELPWKEVAKSKKSEFRDGLC
|
||||||
|
NVDITEGKETLPICAVNNLDDEKPPPFIYTAKMIYPDWCRPIPPKSCGCTNGCSKSKNCA
|
||||||
|
CIVKNGGKIPYYDGAIVEIKPLVYECGPHCKCPPSCNMRVSQHGIKIKLEIFKTESRGWG
|
||||||
|
VRSLESIPIGSFICEYAGELLEDKQAESLTGKDEYLFDLGDEDDPFTINAAQKGNIGRFI
|
||||||
|
NHSCSPNLYAQDVLYDHEEIRIPHIMFFALDNIPPLQELSYDYNYKIDQVYDSNGNIKKK
|
||||||
|
FCYCGSAECSGRLY
|
||||||
|
>seq006
|
||||||
|
MERNGGHYTDKTRVLDIKPLRTLRPVFPSGNQAPPFVCAPPFGPFPPGFSSFYPFSSSQA
|
||||||
|
NQHTPDLNQAQYPPQHQQPQNPPPVYQQQPPQHASEPSLVTPLRSFRSPDVSNGNAELEG
|
||||||
|
STVKRRIPKKRPISRPENMNFESGINVADRENGNRELVLSVLMRFDALRRRFAQLEDAKE
|
||||||
|
AVSGIIKRPDLKSGSTCMGRGVRTNTKKRPGIVPGVEIGDVFFFRFEMCLVGLHSPSMAG
|
||||||
|
IDYLVVKGETEEEPIATSIVSSGYYDNDEGNPDVLIYTGQGGNADKDKQSSDQKLERGNL
|
||||||
|
ALEKSLRRDSAVRVIRGLKEASHNAKIYIYDGLYEIKESWVEKGKSGHNTFKYKLVRAPG
|
||||||
|
QPPAFASWTAIQKWKTGVPSRQGLILPDMTSGVESIPVSLVNEVDTDNGPAYFTYSTTVK
|
||||||
|
YSESFKLMQPSFGCDCANLCKPGNLDCHCIRKNGGDFPYTGNGILVSRKPMIYECSPSCP
|
||||||
|
CSTCKNKVTQMGVKVRLEVFKTANRGWGLRSWDAIRAGSFICIYVGEAKDKSKVQQTMAN
|
||||||
|
DDYTFDTTNVYNPFKWNYEPGLADEDACEEMSEESEIPLPLIISAKNVGNVARFMNHSCS
|
||||||
|
PNVFWQPVSYENNSQLFVHVAFFAISHIPPMTELTYDYGVSRPSGTQNGNPLYGKRKCFC
|
||||||
|
GSAYCRGSFG
|
||||||
|
>seq007
|
||||||
|
MQGVPGFNTVPNPNHYDKSIVLDIKPLRSLKPVFPNGNQGPPFVGCPPFGPSSSEYSSFF
|
||||||
|
PFGAQQPTHDTPDLNQTQNTPIPSFVPPLRSYRTPTKTNGPSSSSGTKRGVGRPKGTTSV
|
||||||
|
KKKEKKTVANEPNLDVQVVKKFSSDFDSGISAAEREDGNAYLVSSVLMRFDAVRRRLSQV
|
||||||
|
EFTKSATSKAAGTLMSNGVRTNMKKRVGTVPGIEVGDIFFSRIEMCLVGLHMQTMAGIDY
|
||||||
|
IISKAGSDEESLATSIVSSGRYEGEAQDPESLIYSGQGGNADKNRQASDQKLERGNLALE
|
||||||
|
NSLRKGNGVRVVRGEEDAASKTGKIYIYDGLYSISESWVEKGKSGCNTFKYKLVRQPGQP
|
||||||
|
PAFGFWKSVQKWKEGLTTRPGLILPDLTSGAESKPVSLVNDVDEDKGPAYFTYTSSLKYS
|
||||||
|
ETFKLTQPVIGCSCSGSCSPGNHNCSCIRKNDGDLPYLNGVILVSRRPVIYECGPTCPCH
|
||||||
|
ASCKNRVIQTGLKSRLEVFKTRNRGWGLRSWDSLRAGSFICEYAGEVKDNGNLRGNQEED
|
||||||
|
AYVFDTSRVFNSFKWNYEPELVDEDPSTEVPEEFNLPSPLLISAKKFGNVARFMNHSCSP
|
||||||
|
NVFWQPVIREGNGESVIHIAFFAMRHIPPMAELTYDYGISPTSEARDESLLHGQRTCLCG
|
||||||
|
SEQCRGSFG
|
||||||
|
>seq008
|
||||||
|
MGSSHIPLDPSLNPSPSLIPKLEPVTESTQNLAFQLPNTNPQALISSAVSDFNEATDFSS
|
||||||
|
DYNTVAESARSAFAQRLQRHDDVAVLDSLTGAIVPVEENPEPEPNPYSTSDSSPSVATQR
|
||||||
|
PRPQPRSSELVRITDVGPESERQFREHVRKTRMIYDSLRMFLMMEEAKRNGVGGRRARAD
|
||||||
|
GKAGKAGSMMRDCMLWMNRDKRIVGSIPGVQVGDIFFFRFELCVMGLHGHPQSGIDFLTG
|
||||||
|
SLSSNGEPIATSVIVSGGYEDDDDQGDVIMYTGQGGQDRLGRQAEHQRLEGGNLAMERSM
|
||||||
|
YYGIEVRVIRGLKYENEVSSRVYVYDGLFRIVDSWFDVGKSGFGVFKYRLERIEGQAEMG
|
||||||
|
SSVLKFARTLKTNPLSVRPRGYINFDISNGKENVPVYLFNDIDSDQEPLYYEYLAQTSFP
|
||||||
|
PGLFVQQSGNASGCDCVNGCGSGCLCEAKNSGEIAYDYNGTLIRQKPLIHECGSACQCPP
|
||||||
|
SCRNRVTQKGLRNRLEVFRSLETGWGVRSLDVLHAGAFICEYAGVALTREQANILTMNGD
|
||||||
|
TLVYPARFSSARWEDWGDLSQVLADFERPSYPDIPPVDFAMDVSKMRNVACYISHSTDPN
|
||||||
|
VIVQFVLHDHNSLMFPRVMLFAAENIPPMTELSLDYGVVDDWNAKLAICN
|
||||||
|
>seq009
|
||||||
|
MDKSIPIKAIPVACVRPDLVDDVTKNTSTIPTMVSPVLTNMPSATSPLLMVPPLRTIWPS
|
||||||
|
NKEWYDGDAGPSSTGPIKREASDNTNDTAHNTFAPPPEMVIPLITIRPSDDSSNYSCDAG
|
||||||
|
AGPSTGPVKRGRGRPKGSKNSTPTEPKKPKVYDPNSLKVTSRGNFDSEITEAETETGNQE
|
||||||
|
IVDSVMMRFDAVRRRLCQINHPEDILTTASGNCTKMGVKTNTRRRIGAVPGIHVGDIFYY
|
||||||
|
WGEMCLVGLHKSNYGGIDFFTAAESAVEGHAAMCVVTAGQYDGETEGLDTLIYSGQGGTD
|
||||||
|
VYGNARDQEMKGGNLALEASVSKGNDVRVVRGVIHPHENNQKIYIYDGMYLVSKFWTVTG
|
||||||
|
KSGFKEFRFKLVRKPNQPPAYAIWKTVENLRNHDLIDSRQGFILEDLSFGAELLRVPLVN
|
||||||
|
EVDEDDKTIPEDFDYIPSQCHSGMMTHEFHFDRQSLGCQNCRHQPCMHQNCTCVQRNGDL
|
||||||
|
LPYHNNILVCRKPLIYECGGSCPCPDHCPTRLVQTGLKLHLEVFKTRNCGWGLRSWDPIR
|
||||||
|
AGTFICEFAGLRKTKEEVEEDDDYLFDTSKIYQRFRWNYEPELLLEDSWEQVSEFINLPT
|
||||||
|
QVLISAKEKGNVGRFMNHSCSPNVFWQPIEYENRGDVYLLIGLFAMKHIPPMTELTYDYG
|
||||||
|
VSCVERSEEDEGFLVCPYLSSSLWPSSSEIHFLINSKGRAWYDKIYRKLASQGNVSSGLD
|
||||||
|
SVKDEPEKLREEQMEGDGFKEKLSDSVLIDEKLEEYSDCDRTATTSRSHTDPVSSQSTHQ
|
||||||
|
TPESFRTPITCDDDTFVSVSGISRDVSNLIPFATETPASPVQEKMANTRSFSNNSVKGNQ
|
||||||
|
DEFFIEDFDVGPMDTIDLYDMTFREDPSDFDDNLLYAMRDRTKQLRSFKRKIMDAIKSKR
|
||||||
|
RREKEYEQLAIWFGDADMGCDLVNDKEQSTTSIDSKSSQTNVPVVSEDSEWEIL
|
||||||
|
>seq010
|
||||||
|
MMMTQRISPSNKRRRVSFVRDFPQFSVKDESDIGGDDVATIKENLDGKEDSNCVGVAYRD
|
||||||
|
HHRPKEESFDSIMKKAGFNVANGNLGNGKFPPSKRNVPLPCEGKVQPLSVEEGIKLMAYE
|
||||||
|
SQRRRCFGKPLVSTKVVQKHRYSPAKKKLSNATALRVRHSPMKKLSNASRLRANAHRPTQ
|
||||||
|
HKDERRSGVLSVIQRNRLSKDLTPRQKVQEVLRIFTLVFDELDRNKAARRGGSETAKSRI
|
||||||
|
DYQTWTILREMGMQVNSQKRIGSVPGIKVGDKIQFKAALSVIGLHFGIMSGIDYMYKGNK
|
||||||
|
EVATSIVSSEGNDYGDRFINDVMIYCGQGGNMRSKDHKAIKDQKLVGGNLALANSIKEKT
|
||||||
|
PVRVIRGERRLDNRGKDYVYDGLYRVEKYWEERGPQGNILFKFKLRRTCQPYVDF
|
||||||
|
>seq011
|
||||||
|
MSQKRSLVFAIRDFPPGCGTHIDVSSSLNHPAEKAFKHPRTGDVSGENLSFAEAKPEGTC
|
||||||
|
LKRESADQDHIFAAPEHNAKREPAGQDHVVAATTVAYATSSHRQKVEIGNSDCDPTPREK
|
||||||
|
VLEVLSLFKQVYNQLDRDKKARRGGDFLDATSRIDLKTLTVLEKMGKQVNTEKRIGSVPG
|
||||||
|
INIGDVFQYKTELRVVGLHSKPMCGIDYIKLGDDRITTSIVASEGYGYNDTYNSGVMVYT
|
||||||
|
GEGGNVINKQKKTEDQKLVKGNLALATSMRQKSQVRVIRGEERLDRKGKRYVYDGLYMVE
|
||||||
|
EYWVERDVRGKSVYKFKLCRIPGQLPLT
|
||||||
|
>seq012
|
||||||
|
MCLVGLHRNTAGGIDSLLAKESGVDGPAATSVVTSGKYDNETEDLETLIYSGHGGKPCDQ
|
||||||
|
VLQRGNRALEASVRRRNEVRVIRGELYNNEKVYIYDGLYLVSDCWQVTGKSGFKEYRFKL
|
||||||
|
LRKPGQPPGYAIWKLVENLRNHELIDPRQGFILGDLSFGEEGLRVPLVNEVDEEDKTIPD
|
||||||
|
DFDYIRSQCYSGMTNDVNVDSQSLVQSYIHQNCTCILKNCGQLPYHDNILVCRKPLIYEC
|
||||||
|
GGSCPTRMVETGLKLHLEVFKTSNCGWGLRSWDPIRAGTFICEFTGVSKTKEEVEEDDDY
|
||||||
|
LFDTSRIYHSFRWNYEPELLCEDACEQVSEDANLPTQVLISAKEKGNVGRFMNHNCWPNV
|
||||||
|
FWQPIEYDDNNGHIYVRIGLFAMKHIPPMTELTYDYGISCVEKTGEDEVIYKGKKICLCG
|
||||||
|
SVKCRGSFG
|
||||||
|
>seq013
|
||||||
|
MGLVGLHSGTIDMEFIGVEDHGDEEGKQIAVSVISSGKNADKTEDPDSLIFTGFGGTDMY
|
||||||
|
HGQPCNQKLERLNIPLEAAFRKKSIVRVVRCMKDEKRTNGNIYIYDGTYMITNRWEEEGQ
|
||||||
|
NGFIVFKFKLVREPDQKPAFGIWKSIQNWRNGLSIRPGLILEDLSNGAENLKVCLVNEVD
|
||||||
|
KENGPALFRYVTSLIHEVINNIPSMVDRCACGRRSCGSKHVFREKLSVSSSLVISAKKSG
|
||||||
|
NVARFMNHSCSPNVFWQSIAREQNGLWCLYIGFFAMKHIPPLTELRYDYGKSRGGGKKMC
|
||||||
|
LCRTKKCCGSFG
|
||||||
|
>seq014
|
||||||
|
MTRVNQLPCDCVSTAEESLTSGTCITPTHVTSLSSPLDRSGDVDPLPVSDESGGSKADES
|
||||||
|
MTDADETKKRKRILSGDCEADENNKSDGEIASLNDGVDAFTAICEDLNCSLCNQLPDRPV
|
||||||
|
TILCGHNFCLKCFDKWIDQGNQICATCRSTIPDKMAANPRVNSSLVSVIRYVKVAKTAGV
|
||||||
|
GTANFFPFTSNQDGPENAFRTKRAKIGEENAARIYVTVPFDHFGPIPAEHDPVRNQGVLV
|
||||||
|
GESWENRVECRQWGVHLPHVSCIAGQEDYGAQSVVISGGYKDDEDHGEWFLYTGRSRGRH
|
||||||
|
FANEDQEFEDLNEALRVSCEMGYPVRVVRSYKDRYSAYAPKEGVRYDGVYRIEKCWRKAR
|
||||||
|
FPVCRYLFVRCDNEPAPWNSDESGDRPRPLPNIPELETASDLFERKESPSWDFDEAEGRW
|
||||||
|
RWMKPPPANHEQRERMKMAMTCLLLFVLIILVGSSSILYQY
|
||||||
|
>seq015
|
||||||
|
MTRVNQLPCDCVSTAEESLTSGTCITPTHVTSLSSPLDRSGDVDPLPVSDESGGSKADES
|
||||||
|
MTDADETKKRKRILSGDCEADENNKSDGEIASLNDGVDAFTAICEDLNCSLCNQLPDRPV
|
||||||
|
TILCGHNFCLKCFDKWIDQGNQICATCRSTIPDKMAANPRVNSSLVSVIRYVKVAKTAGV
|
||||||
|
GTANFFPFTSNQDGPENAFRTKRAKIGEENAARIYVTVPFDHFGPIPAEHDPVRNQGVLV
|
||||||
|
GESWENRVECRQWGVHLPHVSCIAGQEDYGAQSVVISGGYKDDEDHGEWFLYTGRRSYKD
|
||||||
|
RYSAYAPKEGVRYDGVYRIEKCWRKARFPDSFKVCRYLFVRCDNEPAPWNSDESGDRPRP
|
||||||
|
LPNIPELETASDLFERKESPSWDFDEAEGRWRWMKPPPANHEQRERMKMAMTCLLLFVLI
|
||||||
|
ILVGSSSILYQY
|
||||||
|
>seq016
|
||||||
|
MAEQPRINSALVSVIRMAKVSKNANSAVSAAAYHYIRNDDRPDKAFTTERAKRAGKANAS
|
||||||
|
SGQIFVTIPPDHFGPILAENDPKRSIGVLVGDTWEDRLECRQWGAHFPHVAGIAGQSTHG
|
||||||
|
AQSVALSGGYVDDEDHGEWFLYTGSGGRDLSGNKRTNKEQSSDQKFEKLNAALRISCLKG
|
||||||
|
YPVRVVRSHKEKRSSYAPEAGVRYDGVYRIEKCWRKISVQGKFKVCRYLFVRCDNEPAPW
|
||||||
|
TSDIYGDRPRPLPKVDELKGATDISERKGTPSWDFDEKEGWKWVKPPPISRKPNLSGDPA
|
||||||
|
TDKEIRRVARRAQMSVTERLLKEFGCSICKQVMKEPLTTPCAHNFCKLCLVGTYGSQSSM
|
||||||
|
RERSRGGRTLRAQKIVKKCPSCPTDICDFLENPQINREMMDLIESLQRKAVEEGDTKTSS
|
||||||
|
DVSNGAESSGDDGNNEALEKGEDDSSLKDDGSLKDDGKVVKAVVVIKEEDLQPKKSKGED
|
||||||
|
EKEQGDKKMDSADVVDIAVEKKQATKRASEKAEKKQARKRKGDAVATNDGKRMKTGGDAM
|
||||||
|
ETAAEEDAPLSGGTPVKRNSRKSSEVDAKGGGGSPVVSSPRRVTRSNAKASGEADGSPAT
|
||||||
|
RTRRATRAEA
|
||||||
|
>seq017
|
||||||
|
MTPATQYPCDPEGVCMRCKSMPPPEESLTCGTCVTPWHVSCLLSPPETLSATLQWLCPDC
|
||||||
|
SGETNPLPVSGVAAGYGSVGSDLVAAIHSIEADETLSAEEKAKKKQQLLSGKGVVDEDDE
|
||||||
|
EEKKKTSKGKKPIDVLSHFECSFCMQSLQKPVSVRVLFALALMLVWFLESTPCGHNACLK
|
||||||
|
CFLKWMGQGHRSCGTCRSVIPESMVTNPRINLSIVSAIRLARVSEKADARTSKVVHYVDN
|
||||||
|
EDRPDKAFTTERAKKTGNANASSGKIFVTIPRDHFGPIPAENDPVRNQGLLVGESWKGRL
|
||||||
|
ACRQWGAHFPHVSGIAGQASYGAQSVVLAGGYDDDEDHGEWFLYTGRTNTVQAFDQVFLN
|
||||||
|
FNEALRLSCKLGYPVRVVRSTKDKRSPYAPQGGLLRYDGVYRIEKCWRIVGIQMCRFLFV
|
||||||
|
RCDNEPAPWTSDEHGDRPRPLPNVPELNMATDLFERKESPSWDFDEGEDRWRWMKPPPAS
|
||||||
|
KKAVKNVLDPEERKLLREAIKSANPNTMRARLLKEFKCQICQKVMTNPVTTPCAHNFCKA
|
||||||
|
CLESKFAGTALVRERGSGGRKLRSQKSVMKCPCCPTDIAEFVQNPQVNREVAEVIEKLKK
|
||||||
|
QEEEENAKSLDEGQCSGTSHEEEDDEQPKKRIKLDTDAEVSATVVESDMK
|
||||||
|
>seq018
|
||||||
|
MARDIQLPCDGDGVCMRCKSNPPPEESLTCGTCVTPWHVSCLSSPPKTLASTLQWHCPDC
|
||||||
|
SGEIDPLPVSGGATGFESAGSDLVAAIRAIEADESLSTEEKAKMRQRLLSGKGVEEDDEE
|
||||||
|
EKRKKKGKGKNPNLDVLSALGDNLMCSFCMQLPERPVTKPCGHNACLKCFEKWMGQGKRT
|
||||||
|
CGKCRSIIPEKMAKNPRINSSLVAAIRLAKVSKSAAATTSKVFHFISNQDRPDKAFTTER
|
||||||
|
AKKTGKANAASGKIYVTIPPDHFGPIPAENDPVRNQGLLVGESWEDRLECRQWGAHFPHV
|
||||||
|
AGIAGQSTYGAQSVALSGGYKDDEDHGEWFLYTGSGGRDLSGNKRTNKEQSFDQKFEKSN
|
||||||
|
AALKLSCKLGYPVRVVRSHKEKRSAYAPEEGVRYDGVYRIEKCWRKVGVQVCRYLFVRCD
|
||||||
|
NEPAPWTSDENGDRPRPIPNIPELNMATDLFERKETPSWDFDEGEGCWKWMKPPPASKKS
|
||||||
|
VNVLAPEERKNLRKAIKAAHSNTMRARLLKEFKCQICQQVLTLPVTTPCAHNFCKACLEA
|
||||||
|
KFAGKTLVRERSTGGRTLRSRKNVLNCPCCPTDISDFLQNPQVNREVAEVIEKLKTQEED
|
||||||
|
TAELEDEDEGECSGTTPEEDSEQPKKRIKLDTDATVSATIR
|
||||||
|
>seq019
|
||||||
|
MAIQTQLPCDGDGVCMRCQVTPPSEETLTCGTCVTPWHVSCLLPESLASSTGDWECPDCS
|
||||||
|
GVVVPSAAPGTGISGPESSGSVLVAAIRAIQADVTLTEAEKAKKRQRLMSGGGDDGVDDE
|
||||||
|
EKKKLEIFCSICIQLPERPVTTPCGHNFCLKCFEKWAVGQGKLTCMICRSKIPRHVAKNP
|
||||||
|
RINLALVSAIRLANVTKCSGEATAAKVHHIIRNQDRPDKAFTTERAVKTGKANAASGKFF
|
||||||
|
VTIPRDHFGPIPAANDVTRNQGVLVGESWEDRQECRQWGVHFPHVAGIAGQAAVGAQSVA
|
||||||
|
LSGGYDDDEDHGEWFLYTGSGGRDLSGNKRVNKIQSSDQAFKNMNEALRLSCKMGYPVRV
|
||||||
|
VRSWKEKRSAYAPAEGVRYDGVYRIEKCWSNVGVQGLHKMCRYLFVRCDNEPAPWTSDEH
|
||||||
|
GDRPRPLPDVPELENATDLFVRKESPSWGFDEAEGRWKWMKSPPVSRMALDTEERKKNKR
|
||||||
|
AKKGNNAMKARLLKEFSCQICRKVLSLPVTTPCAHNFCKACLEAKFAGITQLRDRSNGVR
|
||||||
|
KLRAKKNIMTCPCCTTDLSEFLQNPQVNREMMEIIENFKKSEEEAEVAESSNISEEEEEE
|
||||||
|
SEPPTKKIKMDNNSVGDTSLSA
|
||||||
|
>seq020
|
||||||
|
MAIQTQLPCDGDGVCMRCQVNPPSEETLTCGTCVTPWHVSCLLPESLASSTGDWECPDCS
|
||||||
|
GVVVPSAAPGTGISGPESSGSVLVTAIRAIQADVTLTEAEKAKKRQRLMSGGGDDGVDDE
|
||||||
|
EKKKLEIFCSICIQLPERPVTTPCGHNFCLKCFEKWAVGQGKLTCMICRSKIPRHVAKNP
|
||||||
|
RINLALVSAIRLANVTKCSGEATAAKVHHIIRNQDRPDKAFTTERAVKTGKANAASGVLV
|
||||||
|
GESWEDRQECRQWGVHFPHVAGIAGQAAVGAQSVALSGGYDDDEDHGEWFLYTGSGGRDL
|
||||||
|
SGNKRVNKIQSSDQAFKNMNEALRLSCKMGYPVRVVRSWKEKRSAYAPAEGVRYDGVYRI
|
||||||
|
EKCWSNVGVQGLHKMCRYLFVRCDNEPAPWTSDEHGDRPRPLPDVPELENATDLFVRKES
|
||||||
|
PSWGFDEAEGRWKWMKSPPVSRMALDTEERKKNKRAKKGNNAMKARLLKEFSCQICRKVL
|
||||||
|
SLPVTTPCAHNFCKACLEAKFAGITQLRDRSNGVRKLRAKKNIMTCPCCTTDLSEFLQNP
|
||||||
|
QVNREMMEIIENFKKSEEEAEVAESSNISEEEGEEESEPPTKKIKMDKNSVGGTSLSA
|
||||||
|
>seq021
|
||||||
|
MAIETQLPCDGDGVCMRCQVNPPSEETLTCGTCVTPWHVPCLLPESLASSTGEWECPDCS
|
||||||
|
GVVVPSAAPGTGNARPESSGSVLVAAIRAIQADETLTEAEKAKKRQKLMSGGGDDGVDEE
|
||||||
|
EKKKLEIFCSICIQLPERPITTPCGHNFCLKCFEKWAVGQGKLTCMICRSKIPRHVAKNP
|
||||||
|
RINLALVSAIRLANVTKCSVEATAAKVHHIIRNQDRPEKAFTTERAVKTGKANAASGKFF
|
||||||
|
VTIPRDHFGPIPAENDVTRKQGVLVGESWEDRQECRQWGAHFPHIAGIAGQSAVGAQSVA
|
||||||
|
LSGGYDDDEDHGEWFLYTGSGGRDLSGNKRINKKQSSDQAFKNMNESLRLSCKMGYPVRV
|
||||||
|
VRSWKEKRSAYAPAEGVRYDGVYRIEKCWSNVGVQGSFKVCRYLFVRCDNEPAPWTSDEH
|
||||||
|
GDRPRPLPNVPELETAADLFVRKESPSWDFDEAEGRWKWMKSPPVSRMALDPEERKKNKR
|
||||||
|
AKNTMKARLLKEFSCQICREVLSLPVTTPCAHNFCKACLEAKFAGITQLRERSNGGRKLR
|
||||||
|
AKKNIMTCPCCTTDLSEFLQNPQVNREMMEIIENFKKSEEEADASISEEEEEESEPPTKK
|
||||||
|
IKMDNNSVGGSGTSLSA
|
||||||
|
>seq022
|
||||||
|
MWIQVRTMDGRQTHTVDSLSRLTKVEELRRKIQELFHVEPGLQRLFYRGKQMEDGHTLFD
|
||||||
|
YEVRLNDTIQLLVRQSLVLPHSTKERDSELSDTDSGCCLGQSESDKSSTHGEAAAETDSR
|
||||||
|
PADEDMWDETELGLYKVNEYVDARDTNMGAWFEAQVVRVTRKAPSRDEPCSSTSRPALEE
|
||||||
|
DVIYHVKYDDYPENGVVQMNSRDVRARARTIIKWQDLEVGQVVMLNYNPDNPKERGFWYD
|
||||||
|
AEISRKRETRTARELYANVVLGDDSLNDCRIIFVDEVFKIERPGEGSPMVDNPMRRKSGP
|
||||||
|
SCKHCKDDVNRLCRVCACHLCGGRQDPDKQLMCDECDMAFHIYCLDPPLSSVPSEDEWYC
|
||||||
|
PECRNDASEVVLAGERLRESKKNAKMASATSSSQRDWGKGMACVGRTKECTIVPSNHYGP
|
||||||
|
IPGIPVGTMWRFRVQVSESGVHRPHVAGIHGRSNDGSYSLVLAGGYEDDVDHGNFFTYTG
|
||||||
|
SGGRDLSGNKRTAEQSCDQKLTNTNRALALNCFAPINDQEGAEAKDWRSGKPVRVVRNVK
|
||||||
|
GGKNSKYAPAEGNRYDGIYKVVKYWPEKGKSGFLVWRYLLRRDDDEPGPWTKEGKDRIKK
|
||||||
|
LGLTMQYPEGYLEALANREREKENSKREEEEQQEGGFASPRTGKGKWKRKSAGGGPSRAG
|
||||||
|
SPRRTSKKTKVEPYSLTAQQSSLIREDKSNAKLWNEVLASLKDRPASGSPFQLFLSKVEE
|
||||||
|
TFQCICCQELVFRPITTVCQHNVCKDCLDRSFRAQVFSCPACRYDLGRSYAMQVNQPLQT
|
||||||
|
VLNQLFPGYGNGR
|
||||||
|
>seq023
|
||||||
|
MWIQVRTMDGKETHTVNSLSRLTKVQELRKKIEEVFHVEPQLQRLFYRGKQMEDGHTLFD
|
||||||
|
YDVRLNDTIQLLVRQSLALPLSTKERDSELSDSDSGYGVGHSESDKSSTHGEGAAEADDK
|
||||||
|
TVWEDTDLGLYKVNEYVDVRDNIFGAWFEAQVVQVQKRALSEDEPCSSSAVKTSEDDIMY
|
||||||
|
HVKYDDYPEHGVDIVKAKNVRARARTVIPWENLEVGQVVMANYNVDYPRKRGFWYDVEIC
|
||||||
|
RKRQTRTARELYGNIRLLNDSQLNNCRIMFVDEVLMIELPKERRPLIASPSQPPPALRNT
|
||||||
|
GKSGPSCRFCKDDENKPCRKCACHVCGGREAPEKQLLCDECDMAFHLYCLKPPLTSVPPE
|
||||||
|
PEWYCPSCRTDSSEVVQAGEKLKESKKKAKMASATSSSRRDWGKGMACVGRTTECTIVPA
|
||||||
|
NHFGPIPGVPVGTMWRFRVQVSESGVHRPHVAGIHGRSNDGAYSLVLAGGYEDDVDNGNY
|
||||||
|
FTYTGSGGRDLSGNKRTAGQSSDQKLTNNNRALALNCHSPINEKGAEAEDWRQGKPVRVV
|
||||||
|
RNMKGGKHSKYAPAEGNRYDGIYKVVKYWPERGKSGFLVWRYLLRRDDTEPEPWTREGKD
|
||||||
|
RTRQLGLTMQYPEGYLEALANKEKSRKRPAKALEQGPSSSKTGKSKQKSTGPTLSSPRAS
|
||||||
|
KKSKLEPYTLSEQQANLIKEDKGNAKLWDDVLTSLQDGPYQIFLSKVKEAFQCICCQELV
|
||||||
|
FRPVTTVCQHNVCKDCLDRSFRAQVFSCPACRFELDHSSPTRVNQPLQTILNQLFPGYGS
|
||||||
|
GR
|
BIN
tests/data/fastq/dna/Ecoli_DNA_R1.fastq.gz
Normal file
BIN
tests/data/fastq/dna/Ecoli_DNA_R1.fastq.gz
Normal file
Binary file not shown.
BIN
tests/data/fastq/dna/Ecoli_DNA_R2.fastq.gz
Normal file
BIN
tests/data/fastq/dna/Ecoli_DNA_R2.fastq.gz
Normal file
Binary file not shown.
BIN
tests/data/fastq/dna/SRR396636_R1.fastq.gz
Normal file
BIN
tests/data/fastq/dna/SRR396636_R1.fastq.gz
Normal file
Binary file not shown.
BIN
tests/data/fastq/dna/SRR396636_R2.fastq.gz
Normal file
BIN
tests/data/fastq/dna/SRR396636_R2.fastq.gz
Normal file
Binary file not shown.
BIN
tests/data/fastq/methylated_dna/Ecoli_10K_methylated_R1.fastq.gz
Normal file
BIN
tests/data/fastq/methylated_dna/Ecoli_10K_methylated_R1.fastq.gz
Normal file
Binary file not shown.
BIN
tests/data/fastq/methylated_dna/Ecoli_10K_methylated_R2.fastq.gz
Normal file
BIN
tests/data/fastq/methylated_dna/Ecoli_10K_methylated_R2.fastq.gz
Normal file
Binary file not shown.
61
tests/data/fastq/methylated_dna/bisulfite-seq_test_data.md
Normal file
61
tests/data/fastq/methylated_dna/bisulfite-seq_test_data.md
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
### E. coli paired-end test dataset for Bisulfite-seq applications
|
||||||
|
|
||||||
|
The E. coli data set was generated using [Sherman](https://github.com/FelixKrueger/Sherman) as 10,000 reads of paired-end data, with average methylation levels of 80% in CpG context, and 10% in non-CG context. The files can be found in the folder: genaral/fastq/dna, and are called:
|
||||||
|
|
||||||
|
```
|
||||||
|
Ecoli_10K_methylated_R1.fastq.gz
|
||||||
|
Ecoli_10K_methylated_R2.fastq.gz
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
Sherman --non_dir --genome /bi/scratch/Genomes/E_coli/ --paired -n 10000 -l 100 --CG 20 --CH 90
|
||||||
|
```
|
||||||
|
|
||||||
|
The data is non-directional, so it should produce roughly 25% mapping to each of the `OT`, `CTOT`, `CTOB` and `OB` strands. Thus, the data can be used for bisulfite mapping in standard (= directional), `--pbat` and `--non_directional` mode.
|
||||||
|
|
||||||
|
A test alignment should look roughly like this:
|
||||||
|
|
||||||
|
`bismark --genome /bi/scratch/Genomes/E_coli/ -1 Ecoli_10K_methylated_R1.fastq.gz -2 Ecoli_10K_methylated_R2.fastq.gz --non_dir`
|
||||||
|
|
||||||
|
```
|
||||||
|
Bismark report for: Ecoli_10K_methylated_R1.fastq.gz and Ecoli_10K_methylated_R2.fastq.gz (version: v0.22.3)
|
||||||
|
Bismark was run with Bowtie 2 against the bisulfite genome of /bi/scratch/Genomes/E_coli/ with the specified options: -q --score-min L,0,-0.2 --ignore-quals --no-mixed --no-discordant --dovetail --maxins 500
|
||||||
|
Option '--non_directional' specified: alignments to all strands were being performed (OT, OB, CTOT, CTOB)
|
||||||
|
|
||||||
|
Final Alignment report
|
||||||
|
======================
|
||||||
|
Sequence pairs analysed in total: 10000
|
||||||
|
Number of paired-end alignments with a unique best hit: 9320
|
||||||
|
Mapping efficiency: 93.2%
|
||||||
|
Sequence pairs with no alignments under any condition: 0
|
||||||
|
Sequence pairs did not map uniquely: 680
|
||||||
|
Sequence pairs which were discarded because genomic sequence could not be extracted: 0
|
||||||
|
|
||||||
|
Number of sequence pairs with unique best (first) alignment came from the bowtie output:
|
||||||
|
CT/GA/CT: 2341 ((converted) top strand)
|
||||||
|
GA/CT/CT: 2329 (complementary to (converted) top strand)
|
||||||
|
GA/CT/GA: 2356 (complementary to (converted) bottom strand)
|
||||||
|
CT/GA/GA: 2294 ((converted) bottom strand)
|
||||||
|
|
||||||
|
Final Cytosine Methylation Report
|
||||||
|
=================================
|
||||||
|
Total number of C's analysed: 471997
|
||||||
|
|
||||||
|
Total methylated C's in CpG context: 111011
|
||||||
|
Total methylated C's in CHG context: 11923
|
||||||
|
Total methylated C's in CHH context: 21433
|
||||||
|
Total methylated C's in Unknown context: 0
|
||||||
|
|
||||||
|
Total unmethylated C's in CpG context: 27790
|
||||||
|
Total unmethylated C's in CHG context: 105877
|
||||||
|
Total unmethylated C's in CHH context: 193963
|
||||||
|
Total unmethylated C's in Unknown context: 0
|
||||||
|
|
||||||
|
C methylated in CpG context: 80.0%
|
||||||
|
C methylated in CHG context: 10.1%
|
||||||
|
C methylated in CHH context: 10.0%
|
||||||
|
Can't determine percentage of methylated Cs in unknown context (CN or CHN) if value was 0
|
||||||
|
|
||||||
|
|
||||||
|
Bismark completed in 0d 0h 0m 12s
|
||||||
|
```
|
BIN
tests/data/fastq/rna/SRR4238351_subsamp.fastq.gz
Normal file
BIN
tests/data/fastq/rna/SRR4238351_subsamp.fastq.gz
Normal file
Binary file not shown.
BIN
tests/data/fastq/rna/SRR4238355_subsamp.fastq.gz
Normal file
BIN
tests/data/fastq/rna/SRR4238355_subsamp.fastq.gz
Normal file
Binary file not shown.
BIN
tests/data/fastq/rna/SRR4238359_subsamp.fastq.gz
Normal file
BIN
tests/data/fastq/rna/SRR4238359_subsamp.fastq.gz
Normal file
Binary file not shown.
BIN
tests/data/fastq/rna/SRR4238379_subsamp.fastq.gz
Normal file
BIN
tests/data/fastq/rna/SRR4238379_subsamp.fastq.gz
Normal file
Binary file not shown.
8
tests/data/fastq/rna/paired_end_RNAseq_test_data.md
Normal file
8
tests/data/fastq/rna/paired_end_RNAseq_test_data.md
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
### Paired-end RNA-seq test dataset
|
||||||
|
|
||||||
|
The data here are 2x 76bp RNA-seq data from mouse (10,000 reads of paired-end data). The files can be found in the folder: genaral/fastq/rna, and are called:
|
||||||
|
|
||||||
|
```
|
||||||
|
test_R1.fastq.gz
|
||||||
|
test_R2.fastq.gz
|
||||||
|
```
|
Binary file not shown.
BIN
tests/data/fastq/rna/test_R1_val_1.fq.gz
Normal file
BIN
tests/data/fastq/rna/test_R1_val_1.fq.gz
Normal file
Binary file not shown.
Binary file not shown.
BIN
tests/data/fastq/rna/test_R2_val_2.fq.gz
Normal file
BIN
tests/data/fastq/rna/test_R2_val_2.fq.gz
Normal file
Binary file not shown.
Loading…
Reference in a new issue