mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-11-10 20:23:10 +00:00
Adding back in all input files we had before
This commit is contained in:
parent
45cc9ede3e
commit
1c84a234ec
18 changed files with 67280 additions and 0 deletions
66946
tests/data/fasta/E_coli/NC_010473.fa
Normal file
66946
tests/data/fasta/E_coli/NC_010473.fa
Normal file
File diff suppressed because it is too large
Load diff
265
tests/data/fasta/msa/BBA0001.tfa
Normal file
265
tests/data/fasta/msa/BBA0001.tfa
Normal file
|
@ -0,0 +1,265 @@
|
|||
>seq001
|
||||
MAGKRKRANAPDQTERRSSVRVQKVRQKALDEKARLVQERVKLLSDRKSEICVDDTELHE
|
||||
KEEENVDGSPKRRSPPKLTAMQKGKQKLSVSLNGKDVNLEPHLKVTKCLRLFNKQYLLCV
|
||||
QAKLSRPDLKGVTEMIKAKAILYPRKIIGDLPGIDVGHRFFSRAEMCAVGFHNHWLNGID
|
||||
YMSMEYEKEYSNYKLPLAVSIVMSGQYEDDLDNADTVTYTGQGGHNLTGNKRQIKDQLLE
|
||||
RGNLALKHCCEYNVPVRVTRGHNCKSSYTKRVYTYDGLYKVEKFWAQKGVSGFTVYKYRL
|
||||
KRLEGQPELTTDQVNFVAGRIPTSTSEIEGLVCEDISGGLEFKGIPATNRVDDSPVSPTS
|
||||
GFTYIKSLIIEPNVIIPKSSTGCNCRGSCTDSKKCACAKLNGGNFPYVDLNDGRLIESRD
|
||||
VVFECGPHCGCGPKCVNRTSQKRLRFNLEVFRSAKKGWAVRSWEYIPAGSPVCEYIGVVR
|
||||
RTADVDTISDNEYIFEIDCQQTMQGLGGRQRRLRDVAVPMNNGVSQSSEDENAPEFCIDA
|
||||
GSTGNFARFINHSCEPNLFVQCVLSSHQDIRLARVVLFAADNISPMQELTYDYGYALDSV
|
||||
HGPDGKVKQLACYCGALNCRKRLY
|
||||
>seq002
|
||||
MPRHFGAVPGVVPGMAFVNRQELRDAGVHLPTQAGISGSASEGADSIVLSGGYEDDRDEG
|
||||
DVILYTGEGGRDPLTGHQVKPQQLVRGNLALAISHRDGLPLRVTRGHRHSSQFSPQSGYQ
|
||||
YAGLYRVDDHWREVGRSGFLIWRFRLTRLENQDAHHAGADPQHPDSQHPERRPTLVQRIV
|
||||
RDTATARAVKALYDHRCQVCGERLETPAGAYAEAAHIRPLGAPHHGPDVAGNILCLCPNH
|
||||
HVLFDFGAFSVGDDLRLLGLPGRLHVHPQHAVDREHLAYHRRHYALQAGLEWGCSVPLT
|
||||
>seq003
|
||||
MGVMENLMVHTEISKVKSQSNGEVEKRGVSVLENGGVCKLDRMSGLKFKRRKVFAVRDFP
|
||||
PGCGSRAMEVKIACENGNVVEDVKVVESLVKEEESLGQRDASENVSDIRMAEPVEVQPLR
|
||||
ICLPGGDVVRDLSVTAGDECSNSEQIVAGSGVSSSSGTENIVRDIVVYADESSLGMDNLD
|
||||
QTQPLEIEMSDVAVAKPRLVAGRKKAKKGIACHSSLKVVSREFGEGSRKKKSKKNLYWRD
|
||||
RESLDSPEQLRILGVGTSSGSSSGDSSRNKVKETLRLFHGVCRKILQEDEAKPEDQRRKG
|
||||
KGLRIDFEASTILKRNGKFLNSGVHILGEVPGVEVGDEFQYRMELNILGIHKPSQAGIDY
|
||||
MKYGKAKVATSIVASGGYDDHLDNSDVLTYTGQGGNVMQVKKKGEELKEPEDQKLITGNL
|
||||
ALATSIEKQTPVRVIRGKHKSTHDKSKGGNYVYDGLYLVEKYWQQVGSHGMNVFKFQLRR
|
||||
IPGQPELSWVEVKKSKSKYREGLCKLDISEGKEQSPISAVNEIDDEKPPLFTYTVKLIYP
|
||||
DWCRPVPPKSCCCTTRCTEAEARVCACVEKNGGEIPYNFDGAIVGAKPTIYECGPLCKCP
|
||||
SSCYLRVTQHGIKLPLEIFKTKSRGWGVRCLKSIPIGSFICEYVGELLEDSEAERRIGND
|
||||
EYLFDIGNRYDNSLAQGMSELMLGTQAGRSMAEGDESSGFTIDAASKGNVGRFINHSCSP
|
||||
NLYAQNVLYDHEDSRIPHVMFFAQDNIPPLQELCYDYNYALDQVRDSKGNIKQKPCFCGA
|
||||
AVCRRRLY
|
||||
>seq004
|
||||
MSTLLPFPDLNLMPDSQSSTAGTTAGDTVVTGKLEVKSEPIEEWQTPPSSTSDQSANTDL
|
||||
IAEFIRISELFRSAFKPLQVKGLDGVSVYGLDSGAIVAVPEKENRELIEPPPGFKDNRVS
|
||||
TVVVSPKFERPRELARIAILGHEQRKELRQVMKRTRMTYESLRIHLMAESMKNHVLGQGR
|
||||
RRRSDMAAAYIMRDRGLWLNYDKHIVGPVTGVEVGDIFFYRMELCVLGLHGQTQAGIDCL
|
||||
TAERSATGEPIATSIVVSGGYEDDEDTGDVLVYTGHGGQDHQHKQCDNQRLVGGNLGMER
|
||||
SMHYGIEVRVIRGIKYENSISSKVYVYDGLYKIVDWWFAVGKSGFGVFKFRLVRIEGQPM
|
||||
MGSAVMRFAQTLRNKPSMVRPTGYVSFDLSNKKENVPVFLYNDVDGDQEPRHYEYIAKAV
|
||||
FPPGIFGQGGISRTGCECKLSCTDDCLCARKNGGEFAYDDNGHLLKGKHVVFECGEFCTC
|
||||
GPSCKSRVTQKGLRNRLEVFRSKETGWGVRTLDLIEAGAFICEYAGVVVTRLQAEILSMN
|
||||
GDVMVYPGRFTDQWRNWGDLSQVYPDFVRPNYPSLPPLDFSMDVSRMRNVACYISHSKEP
|
||||
NVMVQFVLHDHNHLMFPRVMLFALENISPLAELSLDYGLADEVNGKLAICN
|
||||
>seq005
|
||||
MVHSESSILSSLRGGDGGGIPCSKDELAINGSYTDPMGRRKSKRFKVAAESEFSPDFGSI
|
||||
TRQLRSRRMQKEFTVETYETRNVSDVCVLSSQADVELIPGEIVAERDSFKSVDCNDMSVG
|
||||
LTEGAESLGVNMQEPMKDRNMPENTSEQNMVEVHPPSISLPEEDMMGSVCRKSITGTKEL
|
||||
HGRTISVGRDLSPNMGSKFSKNGKTAKRSISVEEENLVLEKSDSGDHLGPSPEVLELEKS
|
||||
EVWIITDKGVVMPSPVKPSEKRNGDYGEGSMRKNSERVALDKKRLASKFRLSNGGLPSCS
|
||||
SSGDSARYKVKETMRLFHETCKKIMQEEEARPRKRDGGNFKVVCEASKILKSKGKNLYSG
|
||||
TQIIGTVPGVEVGDEFQYRMELNLLGIHRPSQSGIDYMKDDGGELVATSIVSSGGYNDVL
|
||||
DNSDVLIYTGQGGNVGKKKNNEPPKDQQLVTGNLALKNSINKKNPVRVIRGIKNTTLQSS
|
||||
VVAKNYVYDGLYLVEEYWEETGSHGKLVFKFKLRRIPGQPELPWKEVAKSKKSEFRDGLC
|
||||
NVDITEGKETLPICAVNNLDDEKPPPFIYTAKMIYPDWCRPIPPKSCGCTNGCSKSKNCA
|
||||
CIVKNGGKIPYYDGAIVEIKPLVYECGPHCKCPPSCNMRVSQHGIKIKLEIFKTESRGWG
|
||||
VRSLESIPIGSFICEYAGELLEDKQAESLTGKDEYLFDLGDEDDPFTINAAQKGNIGRFI
|
||||
NHSCSPNLYAQDVLYDHEEIRIPHIMFFALDNIPPLQELSYDYNYKIDQVYDSNGNIKKK
|
||||
FCYCGSAECSGRLY
|
||||
>seq006
|
||||
MERNGGHYTDKTRVLDIKPLRTLRPVFPSGNQAPPFVCAPPFGPFPPGFSSFYPFSSSQA
|
||||
NQHTPDLNQAQYPPQHQQPQNPPPVYQQQPPQHASEPSLVTPLRSFRSPDVSNGNAELEG
|
||||
STVKRRIPKKRPISRPENMNFESGINVADRENGNRELVLSVLMRFDALRRRFAQLEDAKE
|
||||
AVSGIIKRPDLKSGSTCMGRGVRTNTKKRPGIVPGVEIGDVFFFRFEMCLVGLHSPSMAG
|
||||
IDYLVVKGETEEEPIATSIVSSGYYDNDEGNPDVLIYTGQGGNADKDKQSSDQKLERGNL
|
||||
ALEKSLRRDSAVRVIRGLKEASHNAKIYIYDGLYEIKESWVEKGKSGHNTFKYKLVRAPG
|
||||
QPPAFASWTAIQKWKTGVPSRQGLILPDMTSGVESIPVSLVNEVDTDNGPAYFTYSTTVK
|
||||
YSESFKLMQPSFGCDCANLCKPGNLDCHCIRKNGGDFPYTGNGILVSRKPMIYECSPSCP
|
||||
CSTCKNKVTQMGVKVRLEVFKTANRGWGLRSWDAIRAGSFICIYVGEAKDKSKVQQTMAN
|
||||
DDYTFDTTNVYNPFKWNYEPGLADEDACEEMSEESEIPLPLIISAKNVGNVARFMNHSCS
|
||||
PNVFWQPVSYENNSQLFVHVAFFAISHIPPMTELTYDYGVSRPSGTQNGNPLYGKRKCFC
|
||||
GSAYCRGSFG
|
||||
>seq007
|
||||
MQGVPGFNTVPNPNHYDKSIVLDIKPLRSLKPVFPNGNQGPPFVGCPPFGPSSSEYSSFF
|
||||
PFGAQQPTHDTPDLNQTQNTPIPSFVPPLRSYRTPTKTNGPSSSSGTKRGVGRPKGTTSV
|
||||
KKKEKKTVANEPNLDVQVVKKFSSDFDSGISAAEREDGNAYLVSSVLMRFDAVRRRLSQV
|
||||
EFTKSATSKAAGTLMSNGVRTNMKKRVGTVPGIEVGDIFFSRIEMCLVGLHMQTMAGIDY
|
||||
IISKAGSDEESLATSIVSSGRYEGEAQDPESLIYSGQGGNADKNRQASDQKLERGNLALE
|
||||
NSLRKGNGVRVVRGEEDAASKTGKIYIYDGLYSISESWVEKGKSGCNTFKYKLVRQPGQP
|
||||
PAFGFWKSVQKWKEGLTTRPGLILPDLTSGAESKPVSLVNDVDEDKGPAYFTYTSSLKYS
|
||||
ETFKLTQPVIGCSCSGSCSPGNHNCSCIRKNDGDLPYLNGVILVSRRPVIYECGPTCPCH
|
||||
ASCKNRVIQTGLKSRLEVFKTRNRGWGLRSWDSLRAGSFICEYAGEVKDNGNLRGNQEED
|
||||
AYVFDTSRVFNSFKWNYEPELVDEDPSTEVPEEFNLPSPLLISAKKFGNVARFMNHSCSP
|
||||
NVFWQPVIREGNGESVIHIAFFAMRHIPPMAELTYDYGISPTSEARDESLLHGQRTCLCG
|
||||
SEQCRGSFG
|
||||
>seq008
|
||||
MGSSHIPLDPSLNPSPSLIPKLEPVTESTQNLAFQLPNTNPQALISSAVSDFNEATDFSS
|
||||
DYNTVAESARSAFAQRLQRHDDVAVLDSLTGAIVPVEENPEPEPNPYSTSDSSPSVATQR
|
||||
PRPQPRSSELVRITDVGPESERQFREHVRKTRMIYDSLRMFLMMEEAKRNGVGGRRARAD
|
||||
GKAGKAGSMMRDCMLWMNRDKRIVGSIPGVQVGDIFFFRFELCVMGLHGHPQSGIDFLTG
|
||||
SLSSNGEPIATSVIVSGGYEDDDDQGDVIMYTGQGGQDRLGRQAEHQRLEGGNLAMERSM
|
||||
YYGIEVRVIRGLKYENEVSSRVYVYDGLFRIVDSWFDVGKSGFGVFKYRLERIEGQAEMG
|
||||
SSVLKFARTLKTNPLSVRPRGYINFDISNGKENVPVYLFNDIDSDQEPLYYEYLAQTSFP
|
||||
PGLFVQQSGNASGCDCVNGCGSGCLCEAKNSGEIAYDYNGTLIRQKPLIHECGSACQCPP
|
||||
SCRNRVTQKGLRNRLEVFRSLETGWGVRSLDVLHAGAFICEYAGVALTREQANILTMNGD
|
||||
TLVYPARFSSARWEDWGDLSQVLADFERPSYPDIPPVDFAMDVSKMRNVACYISHSTDPN
|
||||
VIVQFVLHDHNSLMFPRVMLFAAENIPPMTELSLDYGVVDDWNAKLAICN
|
||||
>seq009
|
||||
MDKSIPIKAIPVACVRPDLVDDVTKNTSTIPTMVSPVLTNMPSATSPLLMVPPLRTIWPS
|
||||
NKEWYDGDAGPSSTGPIKREASDNTNDTAHNTFAPPPEMVIPLITIRPSDDSSNYSCDAG
|
||||
AGPSTGPVKRGRGRPKGSKNSTPTEPKKPKVYDPNSLKVTSRGNFDSEITEAETETGNQE
|
||||
IVDSVMMRFDAVRRRLCQINHPEDILTTASGNCTKMGVKTNTRRRIGAVPGIHVGDIFYY
|
||||
WGEMCLVGLHKSNYGGIDFFTAAESAVEGHAAMCVVTAGQYDGETEGLDTLIYSGQGGTD
|
||||
VYGNARDQEMKGGNLALEASVSKGNDVRVVRGVIHPHENNQKIYIYDGMYLVSKFWTVTG
|
||||
KSGFKEFRFKLVRKPNQPPAYAIWKTVENLRNHDLIDSRQGFILEDLSFGAELLRVPLVN
|
||||
EVDEDDKTIPEDFDYIPSQCHSGMMTHEFHFDRQSLGCQNCRHQPCMHQNCTCVQRNGDL
|
||||
LPYHNNILVCRKPLIYECGGSCPCPDHCPTRLVQTGLKLHLEVFKTRNCGWGLRSWDPIR
|
||||
AGTFICEFAGLRKTKEEVEEDDDYLFDTSKIYQRFRWNYEPELLLEDSWEQVSEFINLPT
|
||||
QVLISAKEKGNVGRFMNHSCSPNVFWQPIEYENRGDVYLLIGLFAMKHIPPMTELTYDYG
|
||||
VSCVERSEEDEGFLVCPYLSSSLWPSSSEIHFLINSKGRAWYDKIYRKLASQGNVSSGLD
|
||||
SVKDEPEKLREEQMEGDGFKEKLSDSVLIDEKLEEYSDCDRTATTSRSHTDPVSSQSTHQ
|
||||
TPESFRTPITCDDDTFVSVSGISRDVSNLIPFATETPASPVQEKMANTRSFSNNSVKGNQ
|
||||
DEFFIEDFDVGPMDTIDLYDMTFREDPSDFDDNLLYAMRDRTKQLRSFKRKIMDAIKSKR
|
||||
RREKEYEQLAIWFGDADMGCDLVNDKEQSTTSIDSKSSQTNVPVVSEDSEWEIL
|
||||
>seq010
|
||||
MMMTQRISPSNKRRRVSFVRDFPQFSVKDESDIGGDDVATIKENLDGKEDSNCVGVAYRD
|
||||
HHRPKEESFDSIMKKAGFNVANGNLGNGKFPPSKRNVPLPCEGKVQPLSVEEGIKLMAYE
|
||||
SQRRRCFGKPLVSTKVVQKHRYSPAKKKLSNATALRVRHSPMKKLSNASRLRANAHRPTQ
|
||||
HKDERRSGVLSVIQRNRLSKDLTPRQKVQEVLRIFTLVFDELDRNKAARRGGSETAKSRI
|
||||
DYQTWTILREMGMQVNSQKRIGSVPGIKVGDKIQFKAALSVIGLHFGIMSGIDYMYKGNK
|
||||
EVATSIVSSEGNDYGDRFINDVMIYCGQGGNMRSKDHKAIKDQKLVGGNLALANSIKEKT
|
||||
PVRVIRGERRLDNRGKDYVYDGLYRVEKYWEERGPQGNILFKFKLRRTCQPYVDF
|
||||
>seq011
|
||||
MSQKRSLVFAIRDFPPGCGTHIDVSSSLNHPAEKAFKHPRTGDVSGENLSFAEAKPEGTC
|
||||
LKRESADQDHIFAAPEHNAKREPAGQDHVVAATTVAYATSSHRQKVEIGNSDCDPTPREK
|
||||
VLEVLSLFKQVYNQLDRDKKARRGGDFLDATSRIDLKTLTVLEKMGKQVNTEKRIGSVPG
|
||||
INIGDVFQYKTELRVVGLHSKPMCGIDYIKLGDDRITTSIVASEGYGYNDTYNSGVMVYT
|
||||
GEGGNVINKQKKTEDQKLVKGNLALATSMRQKSQVRVIRGEERLDRKGKRYVYDGLYMVE
|
||||
EYWVERDVRGKSVYKFKLCRIPGQLPLT
|
||||
>seq012
|
||||
MCLVGLHRNTAGGIDSLLAKESGVDGPAATSVVTSGKYDNETEDLETLIYSGHGGKPCDQ
|
||||
VLQRGNRALEASVRRRNEVRVIRGELYNNEKVYIYDGLYLVSDCWQVTGKSGFKEYRFKL
|
||||
LRKPGQPPGYAIWKLVENLRNHELIDPRQGFILGDLSFGEEGLRVPLVNEVDEEDKTIPD
|
||||
DFDYIRSQCYSGMTNDVNVDSQSLVQSYIHQNCTCILKNCGQLPYHDNILVCRKPLIYEC
|
||||
GGSCPTRMVETGLKLHLEVFKTSNCGWGLRSWDPIRAGTFICEFTGVSKTKEEVEEDDDY
|
||||
LFDTSRIYHSFRWNYEPELLCEDACEQVSEDANLPTQVLISAKEKGNVGRFMNHNCWPNV
|
||||
FWQPIEYDDNNGHIYVRIGLFAMKHIPPMTELTYDYGISCVEKTGEDEVIYKGKKICLCG
|
||||
SVKCRGSFG
|
||||
>seq013
|
||||
MGLVGLHSGTIDMEFIGVEDHGDEEGKQIAVSVISSGKNADKTEDPDSLIFTGFGGTDMY
|
||||
HGQPCNQKLERLNIPLEAAFRKKSIVRVVRCMKDEKRTNGNIYIYDGTYMITNRWEEEGQ
|
||||
NGFIVFKFKLVREPDQKPAFGIWKSIQNWRNGLSIRPGLILEDLSNGAENLKVCLVNEVD
|
||||
KENGPALFRYVTSLIHEVINNIPSMVDRCACGRRSCGSKHVFREKLSVSSSLVISAKKSG
|
||||
NVARFMNHSCSPNVFWQSIAREQNGLWCLYIGFFAMKHIPPLTELRYDYGKSRGGGKKMC
|
||||
LCRTKKCCGSFG
|
||||
>seq014
|
||||
MTRVNQLPCDCVSTAEESLTSGTCITPTHVTSLSSPLDRSGDVDPLPVSDESGGSKADES
|
||||
MTDADETKKRKRILSGDCEADENNKSDGEIASLNDGVDAFTAICEDLNCSLCNQLPDRPV
|
||||
TILCGHNFCLKCFDKWIDQGNQICATCRSTIPDKMAANPRVNSSLVSVIRYVKVAKTAGV
|
||||
GTANFFPFTSNQDGPENAFRTKRAKIGEENAARIYVTVPFDHFGPIPAEHDPVRNQGVLV
|
||||
GESWENRVECRQWGVHLPHVSCIAGQEDYGAQSVVISGGYKDDEDHGEWFLYTGRSRGRH
|
||||
FANEDQEFEDLNEALRVSCEMGYPVRVVRSYKDRYSAYAPKEGVRYDGVYRIEKCWRKAR
|
||||
FPVCRYLFVRCDNEPAPWNSDESGDRPRPLPNIPELETASDLFERKESPSWDFDEAEGRW
|
||||
RWMKPPPANHEQRERMKMAMTCLLLFVLIILVGSSSILYQY
|
||||
>seq015
|
||||
MTRVNQLPCDCVSTAEESLTSGTCITPTHVTSLSSPLDRSGDVDPLPVSDESGGSKADES
|
||||
MTDADETKKRKRILSGDCEADENNKSDGEIASLNDGVDAFTAICEDLNCSLCNQLPDRPV
|
||||
TILCGHNFCLKCFDKWIDQGNQICATCRSTIPDKMAANPRVNSSLVSVIRYVKVAKTAGV
|
||||
GTANFFPFTSNQDGPENAFRTKRAKIGEENAARIYVTVPFDHFGPIPAEHDPVRNQGVLV
|
||||
GESWENRVECRQWGVHLPHVSCIAGQEDYGAQSVVISGGYKDDEDHGEWFLYTGRRSYKD
|
||||
RYSAYAPKEGVRYDGVYRIEKCWRKARFPDSFKVCRYLFVRCDNEPAPWNSDESGDRPRP
|
||||
LPNIPELETASDLFERKESPSWDFDEAEGRWRWMKPPPANHEQRERMKMAMTCLLLFVLI
|
||||
ILVGSSSILYQY
|
||||
>seq016
|
||||
MAEQPRINSALVSVIRMAKVSKNANSAVSAAAYHYIRNDDRPDKAFTTERAKRAGKANAS
|
||||
SGQIFVTIPPDHFGPILAENDPKRSIGVLVGDTWEDRLECRQWGAHFPHVAGIAGQSTHG
|
||||
AQSVALSGGYVDDEDHGEWFLYTGSGGRDLSGNKRTNKEQSSDQKFEKLNAALRISCLKG
|
||||
YPVRVVRSHKEKRSSYAPEAGVRYDGVYRIEKCWRKISVQGKFKVCRYLFVRCDNEPAPW
|
||||
TSDIYGDRPRPLPKVDELKGATDISERKGTPSWDFDEKEGWKWVKPPPISRKPNLSGDPA
|
||||
TDKEIRRVARRAQMSVTERLLKEFGCSICKQVMKEPLTTPCAHNFCKLCLVGTYGSQSSM
|
||||
RERSRGGRTLRAQKIVKKCPSCPTDICDFLENPQINREMMDLIESLQRKAVEEGDTKTSS
|
||||
DVSNGAESSGDDGNNEALEKGEDDSSLKDDGSLKDDGKVVKAVVVIKEEDLQPKKSKGED
|
||||
EKEQGDKKMDSADVVDIAVEKKQATKRASEKAEKKQARKRKGDAVATNDGKRMKTGGDAM
|
||||
ETAAEEDAPLSGGTPVKRNSRKSSEVDAKGGGGSPVVSSPRRVTRSNAKASGEADGSPAT
|
||||
RTRRATRAEA
|
||||
>seq017
|
||||
MTPATQYPCDPEGVCMRCKSMPPPEESLTCGTCVTPWHVSCLLSPPETLSATLQWLCPDC
|
||||
SGETNPLPVSGVAAGYGSVGSDLVAAIHSIEADETLSAEEKAKKKQQLLSGKGVVDEDDE
|
||||
EEKKKTSKGKKPIDVLSHFECSFCMQSLQKPVSVRVLFALALMLVWFLESTPCGHNACLK
|
||||
CFLKWMGQGHRSCGTCRSVIPESMVTNPRINLSIVSAIRLARVSEKADARTSKVVHYVDN
|
||||
EDRPDKAFTTERAKKTGNANASSGKIFVTIPRDHFGPIPAENDPVRNQGLLVGESWKGRL
|
||||
ACRQWGAHFPHVSGIAGQASYGAQSVVLAGGYDDDEDHGEWFLYTGRTNTVQAFDQVFLN
|
||||
FNEALRLSCKLGYPVRVVRSTKDKRSPYAPQGGLLRYDGVYRIEKCWRIVGIQMCRFLFV
|
||||
RCDNEPAPWTSDEHGDRPRPLPNVPELNMATDLFERKESPSWDFDEGEDRWRWMKPPPAS
|
||||
KKAVKNVLDPEERKLLREAIKSANPNTMRARLLKEFKCQICQKVMTNPVTTPCAHNFCKA
|
||||
CLESKFAGTALVRERGSGGRKLRSQKSVMKCPCCPTDIAEFVQNPQVNREVAEVIEKLKK
|
||||
QEEEENAKSLDEGQCSGTSHEEEDDEQPKKRIKLDTDAEVSATVVESDMK
|
||||
>seq018
|
||||
MARDIQLPCDGDGVCMRCKSNPPPEESLTCGTCVTPWHVSCLSSPPKTLASTLQWHCPDC
|
||||
SGEIDPLPVSGGATGFESAGSDLVAAIRAIEADESLSTEEKAKMRQRLLSGKGVEEDDEE
|
||||
EKRKKKGKGKNPNLDVLSALGDNLMCSFCMQLPERPVTKPCGHNACLKCFEKWMGQGKRT
|
||||
CGKCRSIIPEKMAKNPRINSSLVAAIRLAKVSKSAAATTSKVFHFISNQDRPDKAFTTER
|
||||
AKKTGKANAASGKIYVTIPPDHFGPIPAENDPVRNQGLLVGESWEDRLECRQWGAHFPHV
|
||||
AGIAGQSTYGAQSVALSGGYKDDEDHGEWFLYTGSGGRDLSGNKRTNKEQSFDQKFEKSN
|
||||
AALKLSCKLGYPVRVVRSHKEKRSAYAPEEGVRYDGVYRIEKCWRKVGVQVCRYLFVRCD
|
||||
NEPAPWTSDENGDRPRPIPNIPELNMATDLFERKETPSWDFDEGEGCWKWMKPPPASKKS
|
||||
VNVLAPEERKNLRKAIKAAHSNTMRARLLKEFKCQICQQVLTLPVTTPCAHNFCKACLEA
|
||||
KFAGKTLVRERSTGGRTLRSRKNVLNCPCCPTDISDFLQNPQVNREVAEVIEKLKTQEED
|
||||
TAELEDEDEGECSGTTPEEDSEQPKKRIKLDTDATVSATIR
|
||||
>seq019
|
||||
MAIQTQLPCDGDGVCMRCQVTPPSEETLTCGTCVTPWHVSCLLPESLASSTGDWECPDCS
|
||||
GVVVPSAAPGTGISGPESSGSVLVAAIRAIQADVTLTEAEKAKKRQRLMSGGGDDGVDDE
|
||||
EKKKLEIFCSICIQLPERPVTTPCGHNFCLKCFEKWAVGQGKLTCMICRSKIPRHVAKNP
|
||||
RINLALVSAIRLANVTKCSGEATAAKVHHIIRNQDRPDKAFTTERAVKTGKANAASGKFF
|
||||
VTIPRDHFGPIPAANDVTRNQGVLVGESWEDRQECRQWGVHFPHVAGIAGQAAVGAQSVA
|
||||
LSGGYDDDEDHGEWFLYTGSGGRDLSGNKRVNKIQSSDQAFKNMNEALRLSCKMGYPVRV
|
||||
VRSWKEKRSAYAPAEGVRYDGVYRIEKCWSNVGVQGLHKMCRYLFVRCDNEPAPWTSDEH
|
||||
GDRPRPLPDVPELENATDLFVRKESPSWGFDEAEGRWKWMKSPPVSRMALDTEERKKNKR
|
||||
AKKGNNAMKARLLKEFSCQICRKVLSLPVTTPCAHNFCKACLEAKFAGITQLRDRSNGVR
|
||||
KLRAKKNIMTCPCCTTDLSEFLQNPQVNREMMEIIENFKKSEEEAEVAESSNISEEEEEE
|
||||
SEPPTKKIKMDNNSVGDTSLSA
|
||||
>seq020
|
||||
MAIQTQLPCDGDGVCMRCQVNPPSEETLTCGTCVTPWHVSCLLPESLASSTGDWECPDCS
|
||||
GVVVPSAAPGTGISGPESSGSVLVTAIRAIQADVTLTEAEKAKKRQRLMSGGGDDGVDDE
|
||||
EKKKLEIFCSICIQLPERPVTTPCGHNFCLKCFEKWAVGQGKLTCMICRSKIPRHVAKNP
|
||||
RINLALVSAIRLANVTKCSGEATAAKVHHIIRNQDRPDKAFTTERAVKTGKANAASGVLV
|
||||
GESWEDRQECRQWGVHFPHVAGIAGQAAVGAQSVALSGGYDDDEDHGEWFLYTGSGGRDL
|
||||
SGNKRVNKIQSSDQAFKNMNEALRLSCKMGYPVRVVRSWKEKRSAYAPAEGVRYDGVYRI
|
||||
EKCWSNVGVQGLHKMCRYLFVRCDNEPAPWTSDEHGDRPRPLPDVPELENATDLFVRKES
|
||||
PSWGFDEAEGRWKWMKSPPVSRMALDTEERKKNKRAKKGNNAMKARLLKEFSCQICRKVL
|
||||
SLPVTTPCAHNFCKACLEAKFAGITQLRDRSNGVRKLRAKKNIMTCPCCTTDLSEFLQNP
|
||||
QVNREMMEIIENFKKSEEEAEVAESSNISEEEGEEESEPPTKKIKMDKNSVGGTSLSA
|
||||
>seq021
|
||||
MAIETQLPCDGDGVCMRCQVNPPSEETLTCGTCVTPWHVPCLLPESLASSTGEWECPDCS
|
||||
GVVVPSAAPGTGNARPESSGSVLVAAIRAIQADETLTEAEKAKKRQKLMSGGGDDGVDEE
|
||||
EKKKLEIFCSICIQLPERPITTPCGHNFCLKCFEKWAVGQGKLTCMICRSKIPRHVAKNP
|
||||
RINLALVSAIRLANVTKCSVEATAAKVHHIIRNQDRPEKAFTTERAVKTGKANAASGKFF
|
||||
VTIPRDHFGPIPAENDVTRKQGVLVGESWEDRQECRQWGAHFPHIAGIAGQSAVGAQSVA
|
||||
LSGGYDDDEDHGEWFLYTGSGGRDLSGNKRINKKQSSDQAFKNMNESLRLSCKMGYPVRV
|
||||
VRSWKEKRSAYAPAEGVRYDGVYRIEKCWSNVGVQGSFKVCRYLFVRCDNEPAPWTSDEH
|
||||
GDRPRPLPNVPELETAADLFVRKESPSWDFDEAEGRWKWMKSPPVSRMALDPEERKKNKR
|
||||
AKNTMKARLLKEFSCQICREVLSLPVTTPCAHNFCKACLEAKFAGITQLRERSNGGRKLR
|
||||
AKKNIMTCPCCTTDLSEFLQNPQVNREMMEIIENFKKSEEEADASISEEEEEESEPPTKK
|
||||
IKMDNNSVGGSGTSLSA
|
||||
>seq022
|
||||
MWIQVRTMDGRQTHTVDSLSRLTKVEELRRKIQELFHVEPGLQRLFYRGKQMEDGHTLFD
|
||||
YEVRLNDTIQLLVRQSLVLPHSTKERDSELSDTDSGCCLGQSESDKSSTHGEAAAETDSR
|
||||
PADEDMWDETELGLYKVNEYVDARDTNMGAWFEAQVVRVTRKAPSRDEPCSSTSRPALEE
|
||||
DVIYHVKYDDYPENGVVQMNSRDVRARARTIIKWQDLEVGQVVMLNYNPDNPKERGFWYD
|
||||
AEISRKRETRTARELYANVVLGDDSLNDCRIIFVDEVFKIERPGEGSPMVDNPMRRKSGP
|
||||
SCKHCKDDVNRLCRVCACHLCGGRQDPDKQLMCDECDMAFHIYCLDPPLSSVPSEDEWYC
|
||||
PECRNDASEVVLAGERLRESKKNAKMASATSSSQRDWGKGMACVGRTKECTIVPSNHYGP
|
||||
IPGIPVGTMWRFRVQVSESGVHRPHVAGIHGRSNDGSYSLVLAGGYEDDVDHGNFFTYTG
|
||||
SGGRDLSGNKRTAEQSCDQKLTNTNRALALNCFAPINDQEGAEAKDWRSGKPVRVVRNVK
|
||||
GGKNSKYAPAEGNRYDGIYKVVKYWPEKGKSGFLVWRYLLRRDDDEPGPWTKEGKDRIKK
|
||||
LGLTMQYPEGYLEALANREREKENSKREEEEQQEGGFASPRTGKGKWKRKSAGGGPSRAG
|
||||
SPRRTSKKTKVEPYSLTAQQSSLIREDKSNAKLWNEVLASLKDRPASGSPFQLFLSKVEE
|
||||
TFQCICCQELVFRPITTVCQHNVCKDCLDRSFRAQVFSCPACRYDLGRSYAMQVNQPLQT
|
||||
VLNQLFPGYGNGR
|
||||
>seq023
|
||||
MWIQVRTMDGKETHTVNSLSRLTKVQELRKKIEEVFHVEPQLQRLFYRGKQMEDGHTLFD
|
||||
YDVRLNDTIQLLVRQSLALPLSTKERDSELSDSDSGYGVGHSESDKSSTHGEGAAEADDK
|
||||
TVWEDTDLGLYKVNEYVDVRDNIFGAWFEAQVVQVQKRALSEDEPCSSSAVKTSEDDIMY
|
||||
HVKYDDYPEHGVDIVKAKNVRARARTVIPWENLEVGQVVMANYNVDYPRKRGFWYDVEIC
|
||||
RKRQTRTARELYGNIRLLNDSQLNNCRIMFVDEVLMIELPKERRPLIASPSQPPPALRNT
|
||||
GKSGPSCRFCKDDENKPCRKCACHVCGGREAPEKQLLCDECDMAFHLYCLKPPLTSVPPE
|
||||
PEWYCPSCRTDSSEVVQAGEKLKESKKKAKMASATSSSRRDWGKGMACVGRTTECTIVPA
|
||||
NHFGPIPGVPVGTMWRFRVQVSESGVHRPHVAGIHGRSNDGAYSLVLAGGYEDDVDNGNY
|
||||
FTYTGSGGRDLSGNKRTAGQSSDQKLTNNNRALALNCHSPINEKGAEAEDWRQGKPVRVV
|
||||
RNMKGGKHSKYAPAEGNRYDGIYKVVKYWPERGKSGFLVWRYLLRRDDTEPEPWTREGKD
|
||||
RTRQLGLTMQYPEGYLEALANKEKSRKRPAKALEQGPSSSKTGKSKQKSTGPTLSSPRAS
|
||||
KKSKLEPYTLSEQQANLIKEDKGNAKLWDDVLTSLQDGPYQIFLSKVKEAFQCICCQELV
|
||||
FRPVTTVCQHNVCKDCLDRSFRAQVFSCPACRFELDHSSPTRVNQPLQTILNQLFPGYGS
|
||||
GR
|
BIN
tests/data/fastq/dna/Ecoli_DNA_R1.fastq.gz
Normal file
BIN
tests/data/fastq/dna/Ecoli_DNA_R1.fastq.gz
Normal file
Binary file not shown.
BIN
tests/data/fastq/dna/Ecoli_DNA_R2.fastq.gz
Normal file
BIN
tests/data/fastq/dna/Ecoli_DNA_R2.fastq.gz
Normal file
Binary file not shown.
BIN
tests/data/fastq/dna/SRR396636_R1.fastq.gz
Normal file
BIN
tests/data/fastq/dna/SRR396636_R1.fastq.gz
Normal file
Binary file not shown.
BIN
tests/data/fastq/dna/SRR396636_R2.fastq.gz
Normal file
BIN
tests/data/fastq/dna/SRR396636_R2.fastq.gz
Normal file
Binary file not shown.
BIN
tests/data/fastq/methylated_dna/Ecoli_10K_methylated_R1.fastq.gz
Normal file
BIN
tests/data/fastq/methylated_dna/Ecoli_10K_methylated_R1.fastq.gz
Normal file
Binary file not shown.
BIN
tests/data/fastq/methylated_dna/Ecoli_10K_methylated_R2.fastq.gz
Normal file
BIN
tests/data/fastq/methylated_dna/Ecoli_10K_methylated_R2.fastq.gz
Normal file
Binary file not shown.
61
tests/data/fastq/methylated_dna/bisulfite-seq_test_data.md
Normal file
61
tests/data/fastq/methylated_dna/bisulfite-seq_test_data.md
Normal file
|
@ -0,0 +1,61 @@
|
|||
### E. coli paired-end test dataset for Bisulfite-seq applications
|
||||
|
||||
The E. coli data set was generated using [Sherman](https://github.com/FelixKrueger/Sherman) as 10,000 reads of paired-end data, with average methylation levels of 80% in CpG context, and 10% in non-CG context. The files can be found in the folder: genaral/fastq/dna, and are called:
|
||||
|
||||
```
|
||||
Ecoli_10K_methylated_R1.fastq.gz
|
||||
Ecoli_10K_methylated_R2.fastq.gz
|
||||
```
|
||||
|
||||
```bash
|
||||
Sherman --non_dir --genome /bi/scratch/Genomes/E_coli/ --paired -n 10000 -l 100 --CG 20 --CH 90
|
||||
```
|
||||
|
||||
The data is non-directional, so it should produce roughly 25% mapping to each of the `OT`, `CTOT`, `CTOB` and `OB` strands. Thus, the data can be used for bisulfite mapping in standard (= directional), `--pbat` and `--non_directional` mode.
|
||||
|
||||
A test alignment should look roughly like this:
|
||||
|
||||
`bismark --genome /bi/scratch/Genomes/E_coli/ -1 Ecoli_10K_methylated_R1.fastq.gz -2 Ecoli_10K_methylated_R2.fastq.gz --non_dir`
|
||||
|
||||
```
|
||||
Bismark report for: Ecoli_10K_methylated_R1.fastq.gz and Ecoli_10K_methylated_R2.fastq.gz (version: v0.22.3)
|
||||
Bismark was run with Bowtie 2 against the bisulfite genome of /bi/scratch/Genomes/E_coli/ with the specified options: -q --score-min L,0,-0.2 --ignore-quals --no-mixed --no-discordant --dovetail --maxins 500
|
||||
Option '--non_directional' specified: alignments to all strands were being performed (OT, OB, CTOT, CTOB)
|
||||
|
||||
Final Alignment report
|
||||
======================
|
||||
Sequence pairs analysed in total: 10000
|
||||
Number of paired-end alignments with a unique best hit: 9320
|
||||
Mapping efficiency: 93.2%
|
||||
Sequence pairs with no alignments under any condition: 0
|
||||
Sequence pairs did not map uniquely: 680
|
||||
Sequence pairs which were discarded because genomic sequence could not be extracted: 0
|
||||
|
||||
Number of sequence pairs with unique best (first) alignment came from the bowtie output:
|
||||
CT/GA/CT: 2341 ((converted) top strand)
|
||||
GA/CT/CT: 2329 (complementary to (converted) top strand)
|
||||
GA/CT/GA: 2356 (complementary to (converted) bottom strand)
|
||||
CT/GA/GA: 2294 ((converted) bottom strand)
|
||||
|
||||
Final Cytosine Methylation Report
|
||||
=================================
|
||||
Total number of C's analysed: 471997
|
||||
|
||||
Total methylated C's in CpG context: 111011
|
||||
Total methylated C's in CHG context: 11923
|
||||
Total methylated C's in CHH context: 21433
|
||||
Total methylated C's in Unknown context: 0
|
||||
|
||||
Total unmethylated C's in CpG context: 27790
|
||||
Total unmethylated C's in CHG context: 105877
|
||||
Total unmethylated C's in CHH context: 193963
|
||||
Total unmethylated C's in Unknown context: 0
|
||||
|
||||
C methylated in CpG context: 80.0%
|
||||
C methylated in CHG context: 10.1%
|
||||
C methylated in CHH context: 10.0%
|
||||
Can't determine percentage of methylated Cs in unknown context (CN or CHN) if value was 0
|
||||
|
||||
|
||||
Bismark completed in 0d 0h 0m 12s
|
||||
```
|
BIN
tests/data/fastq/rna/SRR4238351_subsamp.fastq.gz
Normal file
BIN
tests/data/fastq/rna/SRR4238351_subsamp.fastq.gz
Normal file
Binary file not shown.
BIN
tests/data/fastq/rna/SRR4238355_subsamp.fastq.gz
Normal file
BIN
tests/data/fastq/rna/SRR4238355_subsamp.fastq.gz
Normal file
Binary file not shown.
BIN
tests/data/fastq/rna/SRR4238359_subsamp.fastq.gz
Normal file
BIN
tests/data/fastq/rna/SRR4238359_subsamp.fastq.gz
Normal file
Binary file not shown.
BIN
tests/data/fastq/rna/SRR4238379_subsamp.fastq.gz
Normal file
BIN
tests/data/fastq/rna/SRR4238379_subsamp.fastq.gz
Normal file
Binary file not shown.
8
tests/data/fastq/rna/paired_end_RNAseq_test_data.md
Normal file
8
tests/data/fastq/rna/paired_end_RNAseq_test_data.md
Normal file
|
@ -0,0 +1,8 @@
|
|||
### Paired-end RNA-seq test dataset
|
||||
|
||||
The data here are 2x 76bp RNA-seq data from mouse (10,000 reads of paired-end data). The files can be found in the folder: genaral/fastq/rna, and are called:
|
||||
|
||||
```
|
||||
test_R1.fastq.gz
|
||||
test_R2.fastq.gz
|
||||
```
|
Binary file not shown.
BIN
tests/data/fastq/rna/test_R1_val_1.fq.gz
Normal file
BIN
tests/data/fastq/rna/test_R1_val_1.fq.gz
Normal file
Binary file not shown.
Binary file not shown.
BIN
tests/data/fastq/rna/test_R2_val_2.fq.gz
Normal file
BIN
tests/data/fastq/rna/test_R2_val_2.fq.gz
Normal file
Binary file not shown.
Loading…
Reference in a new issue