mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-12-22 11:08:17 +00:00
Merge branch 'master' into fastqc-unit-test
This commit is contained in:
commit
3406ed4da0
87 changed files with 74647 additions and 54 deletions
18
.github/workflows/cutadapt.yml
vendored
18
.github/workflows/cutadapt.yml
vendored
|
@ -1,8 +1,15 @@
|
|||
name: cutadapt
|
||||
on:
|
||||
push: {}
|
||||
push:
|
||||
paths:
|
||||
- software/cutadapt/**
|
||||
- .github/workflows/cutadapt.yml
|
||||
- tests
|
||||
pull_request:
|
||||
paths: software/cutadapt/*
|
||||
paths:
|
||||
- software/cutadapt/**
|
||||
- .github/workflows/cutadapt.yml
|
||||
- tests
|
||||
|
||||
jobs:
|
||||
run_ci_test:
|
||||
|
@ -12,13 +19,6 @@ jobs:
|
|||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- name: Checkout submodules
|
||||
shell: bash
|
||||
run: |
|
||||
auth_header="$(git config --local --get http.https://github.com/.extraheader)"
|
||||
git submodule sync --recursive
|
||||
git -c "http.extraheader=$auth_header" -c protocol.version=2 submodule update --init --force --recursive --depth=1
|
||||
|
||||
- name: Install Nextflow
|
||||
run: |
|
||||
wget -qO- get.nextflow.io | bash
|
||||
|
|
2
.github/workflows/docker.yml
vendored
2
.github/workflows/docker.yml
vendored
|
@ -14,7 +14,7 @@ jobs:
|
|||
steps:
|
||||
|
||||
# Check out the repo
|
||||
- uses: actions/checkout@v1
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
# Find the tool wrappers that changed
|
||||
# Annoyingly, matrix can't take dynamic variables
|
||||
|
|
15
.github/workflows/fastqc.yml
vendored
15
.github/workflows/fastqc.yml
vendored
|
@ -1,9 +1,15 @@
|
|||
name: FastQC
|
||||
on:
|
||||
push:
|
||||
paths: software/fastqc/**
|
||||
paths:
|
||||
- software/fastqc/**
|
||||
- .github/workflows/fastqc.yml
|
||||
- tests
|
||||
pull_request:
|
||||
paths: software/fastqc/**
|
||||
paths:
|
||||
- software/fastqc/**
|
||||
- .github/workflows/fastqc.yml
|
||||
- tests
|
||||
|
||||
jobs:
|
||||
run_ci_test:
|
||||
|
@ -12,12 +18,11 @@ jobs:
|
|||
NXF_ANSI_LOG: false
|
||||
steps:
|
||||
|
||||
# Check out the repository
|
||||
- uses: actions/checkout@v1
|
||||
submodules: true
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- name: Install Nextflow
|
||||
run: |
|
||||
export NXF_VER="20.06.0-edge"
|
||||
wget -qO- get.nextflow.io | bash
|
||||
sudo mv nextflow /usr/local/bin/
|
||||
|
||||
|
|
2
.github/workflows/lint-code.yml
vendored
2
.github/workflows/lint-code.yml
vendored
|
@ -36,7 +36,7 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check out repository
|
||||
uses: actions/checkout@v1
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Install NodeJS
|
||||
uses: actions/setup-node@v1
|
||||
|
|
15
.github/workflows/samtools_index.yml
vendored
15
.github/workflows/samtools_index.yml
vendored
|
@ -1,8 +1,15 @@
|
|||
name: samtools index
|
||||
on:
|
||||
push: {}
|
||||
push:
|
||||
paths:
|
||||
- software/samtools/index/**
|
||||
- .github/workflows/samtools_index.yml
|
||||
- tests
|
||||
pull_request:
|
||||
paths: software/samtools/index*
|
||||
paths:
|
||||
- software/samtools/index/**
|
||||
- .github/workflows/samtools_index.yml
|
||||
- tests
|
||||
|
||||
jobs:
|
||||
run_ci_test:
|
||||
|
@ -11,9 +18,7 @@ jobs:
|
|||
NXF_ANSI_LOG: false
|
||||
steps:
|
||||
|
||||
# Check out the repository
|
||||
- uses: actions/checkout@v1
|
||||
submodules: true
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- name: Install Nextflow
|
||||
run: |
|
||||
|
|
15
.github/workflows/samtools_sort.yml
vendored
15
.github/workflows/samtools_sort.yml
vendored
|
@ -1,8 +1,15 @@
|
|||
name: samtools sort
|
||||
on:
|
||||
push: {}
|
||||
push:
|
||||
paths:
|
||||
- software/samtools/sort**
|
||||
- .github/workflows/samtools_sort.yml
|
||||
- tests
|
||||
pull_request:
|
||||
paths: software/samtools/sort*
|
||||
paths:
|
||||
- software/samtools/sort**
|
||||
- .github/workflows/samtools_sort.yml
|
||||
- tests
|
||||
|
||||
jobs:
|
||||
run_ci_test:
|
||||
|
@ -11,9 +18,7 @@ jobs:
|
|||
NXF_ANSI_LOG: false
|
||||
steps:
|
||||
|
||||
# Check out the repository
|
||||
- uses: actions/checkout@v1
|
||||
submodules: true
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- name: Install Nextflow
|
||||
run: |
|
||||
|
|
18
.github/workflows/tcoffee.yml
vendored
18
.github/workflows/tcoffee.yml
vendored
|
@ -1,8 +1,15 @@
|
|||
name: tcoffee
|
||||
on:
|
||||
push: {}
|
||||
push:
|
||||
paths:
|
||||
- software/tcoffee/**
|
||||
- .github/workflows/tcoffee.yml
|
||||
- tests
|
||||
pull_request:
|
||||
paths: software/tcoffee/*
|
||||
paths:
|
||||
- software/tcoffee/**
|
||||
- .github/workflows/tcoffee.yml
|
||||
- tests
|
||||
|
||||
jobs:
|
||||
run_ci_test:
|
||||
|
@ -11,14 +18,7 @@ jobs:
|
|||
NXF_ANSI_LOG: false
|
||||
steps:
|
||||
|
||||
# Check out the repository
|
||||
- uses: actions/checkout@v2
|
||||
- name: Checkout submodules
|
||||
shell: bash
|
||||
run: |
|
||||
auth_header="$(git config --local --get http.https://github.com/.extraheader)"
|
||||
git submodule sync --recursive
|
||||
git -c "http.extraheader=$auth_header" -c protocol.version=2 submodule update --init --force --recursive --depth=1
|
||||
|
||||
- name: Install Nextflow
|
||||
run: |
|
||||
|
|
15
.github/workflows/trim_galore.yml
vendored
15
.github/workflows/trim_galore.yml
vendored
|
@ -1,8 +1,15 @@
|
|||
name: Trim Galore!
|
||||
on:
|
||||
push: {}
|
||||
push:
|
||||
paths:
|
||||
- software/trim_galore/**
|
||||
- .github/workflows/trim_galore.yml
|
||||
- tests
|
||||
pull_request:
|
||||
paths: software/trim_galore/*
|
||||
paths:
|
||||
- software/trim_galore/**
|
||||
- .github/workflows/trim_galore.yml
|
||||
- tests
|
||||
|
||||
jobs:
|
||||
run_ci_test:
|
||||
|
@ -11,9 +18,7 @@ jobs:
|
|||
NXF_ANSI_LOG: false
|
||||
steps:
|
||||
|
||||
# Check out the repository
|
||||
- uses: actions/checkout@v1
|
||||
submodules: true
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- name: Install Nextflow
|
||||
run: |
|
||||
|
|
21
README.md
21
README.md
|
@ -68,23 +68,30 @@ The key words "MUST", "MUST NOT", "SHOULD", etc. are to be interpreted as descri
|
|||
### Defining inputs, outputs and parameters
|
||||
- A module file SHOULD only define inputs and outputs as parameters. Additionally,
|
||||
- it MUST define threads or resources where required for a particular process using `task.cpus`
|
||||
- it MUST be possible to pass additional parameters to the tool as a command line string via the `params.<MODULE>_args` parameter.
|
||||
- ~~it MUST be possible to pass additional parameters to the tool as a command line string via the `params.<MODULE>_args` parameter.~~
|
||||
- it MUST be possible to pass additional parameters as a [nextflow Map](https://www.nextflow.io/docs/latest/script.html#maps) through an additional input channel `val(options)` [Details require discussion].
|
||||
- All NGS modules MUST accept a triplet [name, single_end, reads] as input. The single-end boolean values MUST be specified through the input channel and not inferred from the data e.g. [here](https://github.com/nf-core/tools/blob/028a9b3f9d1ad044e879a1de13d3c3a25a06b9a7/nf_core/pipeline-template/%7B%7Bcookiecutter.name_noslash%7D%7D/modules/nf-core/fastqc.nf#L13).
|
||||
- Process names MUST be all uppercase.
|
||||
- Each process MUST emit a file `<TOOL>.version.txt` containing a single line with the software's version in the format `v<VERSION_NUMBER>`.
|
||||
- All outputs MUST be named using `emit`.
|
||||
- A Process MUST NOT contain a `when` statement.
|
||||
- Optional inputs need development on the nextflow side. In the meanwhile, "fake files" MAY be used to work around this issue.
|
||||
|
||||
### Atomicity
|
||||
- Software that can be piped together SHOULD be added to separate module files unless there is an run-time, storage advantage in implementing in this way e.g. `bwa mem | samtools view -C -T ref.fasta` to output CRAM instead of SAM.
|
||||
|
||||
### Resource requirements
|
||||
- Each module MUST define a label `process_low`, `process_medium` or `process_high` to declare resource requirements. (*These flags will be ignored outside of nf-core and the pipeline developer is free to define adequate resource requirements*)
|
||||
|
||||
### Publishing results
|
||||
- The module MUST accept the parameters `params.out_dir` and `params.publish_dir` and MUST publish results into `${params.out_dir}/${params.publish_dir}`.
|
||||
- The `publishDirMode` MUST be configurable via `params.publish_dir_mode`
|
||||
- The module MUST accept a parameter `params.publish_results` accepting at least
|
||||
- `"none"`, to publish no files at all, and
|
||||
- `"default"`, to publish a sensible selection of files.
|
||||
- `"none"`, to publish no files at all,
|
||||
- a glob pattern which is initalized to a sensible default value.
|
||||
|
||||
It MAY accept `"logs"` to publish relevant log files, or other flags, if applicable.
|
||||
|
||||
It MAY accept further options.
|
||||
- To ensure consistent naming, files SHOULD be renamed according to the `$name` variable before returning them.
|
||||
|
||||
### Testing
|
||||
|
@ -93,14 +100,16 @@ The key words "MUST", "MUST NOT", "SHOULD", etc. are to be interpreted as descri
|
|||
|
||||
### Software requirements
|
||||
- Software requirements SHOULD be declared in a conda `environment.yml` file, including exact version numbers. Additionally, there MUST be a `Dockerfile` that containerizes the environment, or packages the software if conda is not available.
|
||||
- Docker containers MUST BE identified by their `sha256(Dockerfile + environment.yml)`.
|
||||
- Each module must have it's own `Dockerfile` and `environment.yml` file
|
||||
- Care should be taken to maintain identical files for subcommands that use the same software. Then the hash tag will be the same and they will be implicitly re-used across subcommands.
|
||||
|
||||
### File formats
|
||||
- Wherever possible, [CRAM](https://en.wikipedia.org/wiki/CRAM_(file_format)) files SHOULD be used over BAM files.
|
||||
- Wherever possible, FASTQ files SHOULD be compressed using gzip.
|
||||
|
||||
### Documentation
|
||||
|
||||
Please add some documentation to the top of the module file in the form of native Nextflow comments. This has to be specified in a particular format as you will be able to see from other examples in the [`nf-core/modules/nf`](https://github.com/nf-core/modules/tree/master/nf) directory.
|
||||
- A module MUST be documented in the `meta.yml` file. It MUST document `params`, `input` and `output`. `input` and `output` MUST be a nested list. [Exact detail need to be elaborated. ]
|
||||
|
||||
### Uploading to `nf-core/modules`
|
||||
|
||||
|
|
BIN
software/bowtie2/test/indices/E_coli/E_coli.1.bt2
Normal file
BIN
software/bowtie2/test/indices/E_coli/E_coli.1.bt2
Normal file
Binary file not shown.
BIN
software/bowtie2/test/indices/E_coli/E_coli.2.bt2
Normal file
BIN
software/bowtie2/test/indices/E_coli/E_coli.2.bt2
Normal file
Binary file not shown.
BIN
software/bowtie2/test/indices/E_coli/E_coli.3.bt2
Normal file
BIN
software/bowtie2/test/indices/E_coli/E_coli.3.bt2
Normal file
Binary file not shown.
BIN
software/bowtie2/test/indices/E_coli/E_coli.4.bt2
Normal file
BIN
software/bowtie2/test/indices/E_coli/E_coli.4.bt2
Normal file
Binary file not shown.
BIN
software/bowtie2/test/indices/E_coli/E_coli.rev.1.bt2
Normal file
BIN
software/bowtie2/test/indices/E_coli/E_coli.rev.1.bt2
Normal file
Binary file not shown.
BIN
software/bowtie2/test/indices/E_coli/E_coli.rev.2.bt2
Normal file
BIN
software/bowtie2/test/indices/E_coli/E_coli.rev.2.bt2
Normal file
Binary file not shown.
1
software/bowtie2/test/indices/E_coli/NC_010473.fa
Symbolic link
1
software/bowtie2/test/indices/E_coli/NC_010473.fa
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../../../tests/data/fasta/E_coli/NC_010473.fa
|
1
software/bowtie2/test/input/Ecoli_DNA_R1.fastq.gz
Symbolic link
1
software/bowtie2/test/input/Ecoli_DNA_R1.fastq.gz
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../../tests/data/fastq/dna/Ecoli_DNA_R1.fastq.gz
|
1
software/bowtie2/test/input/Ecoli_DNA_R2.fastq.gz
Symbolic link
1
software/bowtie2/test/input/Ecoli_DNA_R2.fastq.gz
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../../tests/data/fastq/dna/Ecoli_DNA_R2.fastq.gz
|
1
software/bowtie2/test/input/test_R1_val_1.fq.gz
Symbolic link
1
software/bowtie2/test/input/test_R1_val_1.fq.gz
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../../tests/data/fastq/rna/test_R1_val_1.fq.gz
|
1
software/bowtie2/test/input/test_R2_val_2.fq.gz
Symbolic link
1
software/bowtie2/test/input/test_R2_val_2.fq.gz
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../../tests/data/fastq/rna/test_R2_val_2.fq.gz
|
BIN
software/bowtie2/test/output/Ecoli_DNA_R_E_coli_bowtie2.bam
Normal file
BIN
software/bowtie2/test/output/Ecoli_DNA_R_E_coli_bowtie2.bam
Normal file
Binary file not shown.
|
@ -0,0 +1,15 @@
|
|||
10000 reads; of these:
|
||||
10000 (100.00%) were paired; of these:
|
||||
893 (8.93%) aligned concordantly 0 times
|
||||
8474 (84.74%) aligned concordantly exactly 1 time
|
||||
633 (6.33%) aligned concordantly >1 times
|
||||
----
|
||||
893 pairs aligned concordantly 0 times; of these:
|
||||
815 (91.27%) aligned discordantly 1 time
|
||||
----
|
||||
78 pairs aligned 0 times concordantly or discordantly; of these:
|
||||
156 mates make up the pairs; of these:
|
||||
0 (0.00%) aligned 0 times
|
||||
1 (0.64%) aligned exactly 1 time
|
||||
155 (99.36%) aligned >1 times
|
||||
100.00% overall alignment rate
|
BIN
software/bowtie2/test/output/test_GRCm38_bowtie2.bam
Normal file
BIN
software/bowtie2/test/output/test_GRCm38_bowtie2.bam
Normal file
Binary file not shown.
15
software/bowtie2/test/output/test_GRCm38_bowtie2_stats.txt
Normal file
15
software/bowtie2/test/output/test_GRCm38_bowtie2_stats.txt
Normal file
|
@ -0,0 +1,15 @@
|
|||
9979 reads; of these:
|
||||
9979 (100.00%) were paired; of these:
|
||||
3584 (35.92%) aligned concordantly 0 times
|
||||
3705 (37.13%) aligned concordantly exactly 1 time
|
||||
2690 (26.96%) aligned concordantly >1 times
|
||||
----
|
||||
3584 pairs aligned concordantly 0 times; of these:
|
||||
886 (24.72%) aligned discordantly 1 time
|
||||
----
|
||||
2698 pairs aligned 0 times concordantly or discordantly; of these:
|
||||
5396 mates make up the pairs; of these:
|
||||
2282 (42.29%) aligned 0 times
|
||||
1467 (27.19%) aligned exactly 1 time
|
||||
1647 (30.52%) aligned >1 times
|
||||
88.57% overall alignment rate
|
1
software/fastq_screen/test/input/test_R1_val_1.fq.gz
Symbolic link
1
software/fastq_screen/test/input/test_R1_val_1.fq.gz
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../../tests/data/fastq/rna/test_R1_val_1.fq.gz
|
1
software/fastq_screen/test/input/test_R2_val_2.fq.gz
Symbolic link
1
software/fastq_screen/test/input/test_R2_val_2.fq.gz
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../../tests/data/fastq/rna/test_R2_val_2.fq.gz
|
31
software/fastq_screen/test/output/test_R1_screen.txt
Normal file
31
software/fastq_screen/test/output/test_R1_screen.txt
Normal file
|
@ -0,0 +1,31 @@
|
|||
#Fastq_screen version: 0.14.0 #Aligner: bowtie2 #Reads in subset: 100000
|
||||
Genome #Reads_processed #Unmapped %Unmapped #One_hit_one_genome %One_hit_one_genome #Multiple_hits_one_genome %Multiple_hits_one_genome #One_hit_multiple_genomes %One_hit_multiple_genomes Multiple_hits_multiple_genomes %Multiple_hits_multiple_genomes
|
||||
Cat 10000 9171 91.71 0 0.00 0 0.00 421 4.21 408 4.08
|
||||
Chicken 10000 8932 89.32 0 0.00 0 0.00 64 0.64 1004 10.04
|
||||
Cow 10000 8484 84.84 0 0.00 0 0.00 294 2.94 1222 12.22
|
||||
Drosophila 10000 9469 94.69 0 0.00 0 0.00 19 0.19 512 5.12
|
||||
Human 10000 8367 83.67 2 0.02 3 0.03 354 3.54 1274 12.74
|
||||
Mouse 10000 122 1.22 3265 32.65 869 8.69 2066 20.66 3678 36.78
|
||||
Pig 10000 8459 84.59 0 0.00 0 0.00 334 3.34 1207 12.07
|
||||
Rat 10000 6432 64.32 1 0.01 3 0.03 1334 13.34 2230 22.30
|
||||
Zebrafish 10000 9125 91.25 0 0.00 0 0.00 41 0.41 834 8.34
|
||||
Arabidopsis 10000 9497 94.97 0 0.00 0 0.00 5 0.05 498 4.98
|
||||
Grape 10000 9600 96.00 0 0.00 1 0.01 82 0.82 317 3.17
|
||||
Potato 10000 9460 94.60 0 0.00 0 0.00 12 0.12 528 5.28
|
||||
Tomato 10000 9521 95.21 0 0.00 0 0.00 45 0.45 434 4.34
|
||||
Adapters 10000 10000 100.00 0 0.00 0 0.00 0 0.00 0 0.00
|
||||
Brachybacterium 10000 10000 100.00 0 0.00 0 0.00 0 0.00 0 0.00
|
||||
Pseudomonas 10000 10000 100.00 0 0.00 0 0.00 0 0.00 0 0.00
|
||||
Massilia_oculi 10000 9999 99.99 0 0.00 1 0.01 0 0.00 0 0.00
|
||||
Ecoli 10000 9998 99.98 1 0.01 1 0.01 0 0.00 0 0.00
|
||||
Lambda 10000 10000 100.00 0 0.00 0 0.00 0 0.00 0 0.00
|
||||
MT 10000 7856 78.56 0 0.00 0 0.00 2034 20.34 110 1.10
|
||||
PhiX 10000 10000 100.00 0 0.00 0 0.00 0 0.00 0 0.00
|
||||
rRNA 10000 9157 91.57 0 0.00 0 0.00 111 1.11 732 7.32
|
||||
Wasp 10000 9473 94.73 0 0.00 0 0.00 211 2.11 316 3.16
|
||||
Vectors 10000 9713 97.13 0 0.00 0 0.00 52 0.52 235 2.35
|
||||
Worm 10000 9645 96.45 0 0.00 0 0.00 13 0.13 342 3.42
|
||||
Yeast 10000 9507 95.07 0 0.00 0 0.00 4 0.04 489 4.89
|
||||
Mycoplasma 10000 9998 99.98 0 0.00 0 0.00 0 0.00 2 0.02
|
||||
|
||||
%Hit_no_genomes: 0.88
|
1
software/fastqc/test/input/test_R1.fastq.gz
Symbolic link
1
software/fastqc/test/input/test_R1.fastq.gz
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../../tests/data/fastq/rna/test_R1.fastq.gz
|
1
software/fastqc/test/input/test_R1_val_1.fq.gz
Symbolic link
1
software/fastqc/test/input/test_R1_val_1.fq.gz
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../../tests/data/fastq/rna/test_R1_val_1.fq.gz
|
1
software/fastqc/test/input/test_R2.fastq.gz
Symbolic link
1
software/fastqc/test/input/test_R2.fastq.gz
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../../tests/data/fastq/rna/test_R2.fastq.gz
|
1
software/fastqc/test/input/test_R2_val_2.fq.gz
Symbolic link
1
software/fastqc/test/input/test_R2_val_2.fq.gz
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../../tests/data/fastq/rna/test_R2_val_2.fq.gz
|
1
software/fastqc/test/input/test_single_end.fastq.gz
Symbolic link
1
software/fastqc/test/input/test_single_end.fastq.gz
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../../tests/data/fastq/rna/test_single_end.fastq.gz
|
|
@ -12,24 +12,26 @@ include { FASTQC } from '../main.nf'
|
|||
* Test if FASTQC runs with single-end data
|
||||
*/
|
||||
workflow test_single_end {
|
||||
input_files = Channel.fromPath("data/test_single_end.fastq.gz")
|
||||
.map {f -> [f.baseName, true, f]}
|
||||
input_files = Channel.fromPath("${baseDir}/input/test_single_end.fastq.gz")
|
||||
.map {f -> [f.name.replace(".fastq.gz", ""), true, f]}
|
||||
FASTQC(input_files)
|
||||
|
||||
// test that the output looks as expected
|
||||
FASTQC.out.html.map { name, is_single_end, html_file ->
|
||||
html_hash = checksum.getMD5(new File("${html_file}"));
|
||||
|
||||
assert name == "test_single_end.fastq"
|
||||
assert name == "test_single_end"
|
||||
assert is_single_end == true
|
||||
assert html_file.getName() == "test_single_end.fastq_fastqc.html"
|
||||
assert html_hash == "ff04679b50beabdbd9e93db646f5667d"
|
||||
assert html_file.getName() == "test_single_end_fastqc.html"
|
||||
// Hash seems to vary between local runs and GitHub Actions
|
||||
// TODO: Might be solved when using Docker for tests?
|
||||
// assert html_hash == "8ed68442ebb5b9706bf79b4f66701e15"
|
||||
}
|
||||
FASTQC.out.zip.map { name, is_single_end, zip_file ->
|
||||
// NOTE: output zip files do not have a consistent hash
|
||||
assert name == "test_single_end.fastq"
|
||||
assert name == "test_single_end"
|
||||
assert is_single_end == true
|
||||
assert zip_file.getName() == "test_single_end.fastq_fastqc.zip"
|
||||
assert zip_file.getName() == "test_single_end_fastqc.zip"
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -37,7 +39,7 @@ workflow test_single_end {
|
|||
* Test if FASTQC runs with paired end data
|
||||
*/
|
||||
workflow test_paired_end {
|
||||
input_files = Channel.fromFilePairs("data/test_R{1,2}.fastq.gz")
|
||||
input_files = Channel.fromFilePairs("input/test_R{1,2}.fastq.gz")
|
||||
.map {f -> [f[0], false, f[1]]}
|
||||
FASTQC(input_files)
|
||||
|
||||
|
|
187
software/fastqc/test/output/test_R1_fastqc.html
Normal file
187
software/fastqc/test/output/test_R1_fastqc.html
Normal file
File diff suppressed because one or more lines are too long
BIN
software/fastqc/test/output/test_R1_fastqc.zip
Normal file
BIN
software/fastqc/test/output/test_R1_fastqc.zip
Normal file
Binary file not shown.
187
software/fastqc/test/output/test_R1_val_1_fastqc.html
Normal file
187
software/fastqc/test/output/test_R1_val_1_fastqc.html
Normal file
File diff suppressed because one or more lines are too long
BIN
software/fastqc/test/output/test_R1_val_1_fastqc.zip
Normal file
BIN
software/fastqc/test/output/test_R1_val_1_fastqc.zip
Normal file
Binary file not shown.
187
software/fastqc/test/output/test_R2_fastqc.html
Normal file
187
software/fastqc/test/output/test_R2_fastqc.html
Normal file
File diff suppressed because one or more lines are too long
BIN
software/fastqc/test/output/test_R2_fastqc.zip
Normal file
BIN
software/fastqc/test/output/test_R2_fastqc.zip
Normal file
Binary file not shown.
187
software/fastqc/test/output/test_R2_val_2_fastqc.html
Normal file
187
software/fastqc/test/output/test_R2_val_2_fastqc.html
Normal file
File diff suppressed because one or more lines are too long
BIN
software/fastqc/test/output/test_R2_val_2_fastqc.zip
Normal file
BIN
software/fastqc/test/output/test_R2_val_2_fastqc.zip
Normal file
Binary file not shown.
BIN
software/hisat2/test/indices/E_coli/E_coli.1.ht2
Normal file
BIN
software/hisat2/test/indices/E_coli/E_coli.1.ht2
Normal file
Binary file not shown.
BIN
software/hisat2/test/indices/E_coli/E_coli.2.ht2
Normal file
BIN
software/hisat2/test/indices/E_coli/E_coli.2.ht2
Normal file
Binary file not shown.
BIN
software/hisat2/test/indices/E_coli/E_coli.3.ht2
Normal file
BIN
software/hisat2/test/indices/E_coli/E_coli.3.ht2
Normal file
Binary file not shown.
BIN
software/hisat2/test/indices/E_coli/E_coli.4.ht2
Normal file
BIN
software/hisat2/test/indices/E_coli/E_coli.4.ht2
Normal file
Binary file not shown.
BIN
software/hisat2/test/indices/E_coli/E_coli.5.ht2
Normal file
BIN
software/hisat2/test/indices/E_coli/E_coli.5.ht2
Normal file
Binary file not shown.
BIN
software/hisat2/test/indices/E_coli/E_coli.6.ht2
Normal file
BIN
software/hisat2/test/indices/E_coli/E_coli.6.ht2
Normal file
Binary file not shown.
BIN
software/hisat2/test/indices/E_coli/E_coli.7.ht2
Normal file
BIN
software/hisat2/test/indices/E_coli/E_coli.7.ht2
Normal file
Binary file not shown.
BIN
software/hisat2/test/indices/E_coli/E_coli.8.ht2
Normal file
BIN
software/hisat2/test/indices/E_coli/E_coli.8.ht2
Normal file
Binary file not shown.
1
software/hisat2/test/indices/E_coli/NC_010473.fa
Symbolic link
1
software/hisat2/test/indices/E_coli/NC_010473.fa
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../../../tests/data/fasta/E_coli/NC_010473.fa
|
1
software/hisat2/test/input/Ecoli_DNA_R1.fastq.gz
Symbolic link
1
software/hisat2/test/input/Ecoli_DNA_R1.fastq.gz
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../../tests/data/fastq/dna/Ecoli_DNA_R1.fastq.gz
|
1
software/hisat2/test/input/Ecoli_DNA_R2.fastq.gz
Symbolic link
1
software/hisat2/test/input/Ecoli_DNA_R2.fastq.gz
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../../tests/data/fastq/dna/Ecoli_DNA_R2.fastq.gz
|
BIN
software/hisat2/test/output/Ecoli_DNA_R_E_coli_hisat2.bam
Normal file
BIN
software/hisat2/test/output/Ecoli_DNA_R_E_coli_hisat2.bam
Normal file
Binary file not shown.
|
@ -0,0 +1,6 @@
|
|||
10000 reads; of these:
|
||||
10000 (100.00%) were paired; of these:
|
||||
823 (8.23%) aligned concordantly 0 times
|
||||
8583 (85.83%) aligned concordantly exactly 1 time
|
||||
594 (5.94%) aligned concordantly >1 times
|
||||
91.77% overall alignment rate
|
|
@ -0,0 +1 @@
|
|||
../../../../bowtie2/test/output/test_GRCm38_bowtie2_stats.txt
|
1
software/multiqc/test/input/fastq_screen/test_R1_screen.txt
Symbolic link
1
software/multiqc/test/input/fastq_screen/test_R1_screen.txt
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../../fastq_screen/test/output/test_R1_screen.txt
|
1
software/multiqc/test/input/fastqc/test_R1_fastqc.zip
Symbolic link
1
software/multiqc/test/input/fastqc/test_R1_fastqc.zip
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../../fastqc/test/output/test_R1_fastqc.zip
|
1
software/multiqc/test/input/fastqc/test_R1_val_1_fastqc.zip
Symbolic link
1
software/multiqc/test/input/fastqc/test_R1_val_1_fastqc.zip
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../../fastqc/test/output/test_R1_val_1_fastqc.zip
|
1
software/multiqc/test/input/fastqc/test_R2_fastqc.zip
Symbolic link
1
software/multiqc/test/input/fastqc/test_R2_fastqc.zip
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../../fastqc/test/output/test_R2_fastqc.zip
|
1
software/multiqc/test/input/fastqc/test_R2_val_2_fastqc.zip
Symbolic link
1
software/multiqc/test/input/fastqc/test_R2_val_2_fastqc.zip
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../../fastqc/test/output/test_R2_val_2_fastqc.zip
|
|
@ -0,0 +1 @@
|
|||
../../../../hisat2/test/output/Ecoli_DNA_R_E_coli_hisat2_stats.txt
|
|
@ -0,0 +1 @@
|
|||
../../../../trim_galore/test/output/test_R1.fastq.gz_trimming_report.txt
|
|
@ -0,0 +1 @@
|
|||
../../../../trim_galore/test/output/test_R2.fastq.gz_trimming_report.txt
|
6244
software/multiqc/test/output/multiqc_report.html
Normal file
6244
software/multiqc/test/output/multiqc_report.html
Normal file
File diff suppressed because one or more lines are too long
1
software/trim_galore/test/input/test_R1.fastq.gz
Symbolic link
1
software/trim_galore/test/input/test_R1.fastq.gz
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../../tests/data/fastq/rna/test_R1.fastq.gz
|
1
software/trim_galore/test/input/test_R2.fastq.gz
Symbolic link
1
software/trim_galore/test/input/test_R2.fastq.gz
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../../tests/data/fastq/rna/test_R2.fastq.gz
|
|
@ -0,0 +1,97 @@
|
|||
|
||||
SUMMARISING RUN PARAMETERS
|
||||
==========================
|
||||
Input filename: test_R1.fastq.gz
|
||||
Trimming mode: paired-end
|
||||
Trim Galore version: 0.6.5
|
||||
Cutadapt version: 2.3
|
||||
Number of cores used for trimming: 1
|
||||
Quality Phred score cutoff: 20
|
||||
Quality encoding type selected: ASCII+33
|
||||
Using Nextera adapter for trimming (count: 83). Second best hit was smallRNA (count: 0)
|
||||
Adapter sequence: 'CTGTCTCTTATA' (Nextera Transposase sequence; auto-detected)
|
||||
Maximum trimming error rate: 0.1 (default)
|
||||
Minimum required adapter overlap (stringency): 1 bp
|
||||
Minimum required sequence length for both reads before a sequence pair gets removed: 20 bp
|
||||
Output file will be GZIP compressed
|
||||
|
||||
|
||||
This is cutadapt 2.3 with Python 3.7.3
|
||||
Command line parameters: -j 1 -e 0.1 -q 20 -O 1 -a CTGTCTCTTATA test_R1.fastq.gz
|
||||
Processing reads on 1 core in single-end mode ...
|
||||
Finished in 0.19 s (19 us/read; 3.12 M reads/minute).
|
||||
|
||||
=== Summary ===
|
||||
|
||||
Total reads processed: 10,000
|
||||
Reads with adapters: 3,225 (32.2%)
|
||||
Reads written (passing filters): 10,000 (100.0%)
|
||||
|
||||
Total basepairs processed: 760,000 bp
|
||||
Quality-trimmed: 4,492 bp (0.6%)
|
||||
Total written (filtered): 748,403 bp (98.5%)
|
||||
|
||||
=== Adapter 1 ===
|
||||
|
||||
Sequence: CTGTCTCTTATA; Type: regular 3'; Length: 12; Trimmed: 3225 times.
|
||||
|
||||
No. of allowed errors:
|
||||
0-9 bp: 0; 10-12 bp: 1
|
||||
|
||||
Bases preceding removed adapters:
|
||||
A: 23.8%
|
||||
C: 28.2%
|
||||
G: 22.7%
|
||||
T: 25.3%
|
||||
none/other: 0.0%
|
||||
|
||||
Overview of removed sequences
|
||||
length count expect max.err error counts
|
||||
1 2170 2500.0 0 2170
|
||||
2 622 625.0 0 622
|
||||
3 223 156.2 0 223
|
||||
4 64 39.1 0 64
|
||||
5 14 9.8 0 14
|
||||
6 9 2.4 0 9
|
||||
7 8 0.6 0 8
|
||||
8 5 0.2 0 5
|
||||
9 4 0.0 0 4
|
||||
10 8 0.0 1 7 1
|
||||
11 3 0.0 1 3
|
||||
12 4 0.0 1 4
|
||||
13 6 0.0 1 6
|
||||
14 5 0.0 1 4 1
|
||||
15 5 0.0 1 5
|
||||
16 6 0.0 1 5 1
|
||||
17 3 0.0 1 3
|
||||
18 3 0.0 1 3
|
||||
19 1 0.0 1 1
|
||||
20 3 0.0 1 3
|
||||
21 7 0.0 1 7
|
||||
22 7 0.0 1 7
|
||||
23 3 0.0 1 3
|
||||
24 6 0.0 1 6
|
||||
25 4 0.0 1 4
|
||||
26 2 0.0 1 2
|
||||
27 4 0.0 1 4
|
||||
28 1 0.0 1 1
|
||||
29 3 0.0 1 3
|
||||
30 4 0.0 1 4
|
||||
32 3 0.0 1 3
|
||||
33 2 0.0 1 1 1
|
||||
34 1 0.0 1 1
|
||||
35 1 0.0 1 1
|
||||
40 1 0.0 1 1
|
||||
42 1 0.0 1 0 1
|
||||
45 1 0.0 1 0 1
|
||||
49 1 0.0 1 0 1
|
||||
52 1 0.0 1 0 1
|
||||
56 2 0.0 1 0 2
|
||||
59 1 0.0 1 0 1
|
||||
67 1 0.0 1 0 1
|
||||
70 2 0.0 1 0 2
|
||||
|
||||
RUN STATISTICS FOR INPUT FILE: test_R1.fastq.gz
|
||||
=============================================
|
||||
10000 sequences processed in total
|
||||
|
1
software/trim_galore/test/output/test_R1_val_1.fq.gz
Symbolic link
1
software/trim_galore/test/output/test_R1_val_1.fq.gz
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../../tests/data/fastq/rna/test_R1_val_1.fq.gz
|
|
@ -0,0 +1,100 @@
|
|||
|
||||
SUMMARISING RUN PARAMETERS
|
||||
==========================
|
||||
Input filename: test_R2.fastq.gz
|
||||
Trimming mode: paired-end
|
||||
Trim Galore version: 0.6.5
|
||||
Cutadapt version: 2.3
|
||||
Number of cores used for trimming: 1
|
||||
Quality Phred score cutoff: 20
|
||||
Quality encoding type selected: ASCII+33
|
||||
Using Nextera adapter for trimming (count: 83). Second best hit was smallRNA (count: 0)
|
||||
Adapter sequence: 'CTGTCTCTTATA' (Nextera Transposase sequence; auto-detected)
|
||||
Maximum trimming error rate: 0.1 (default)
|
||||
Minimum required adapter overlap (stringency): 1 bp
|
||||
Minimum required sequence length for both reads before a sequence pair gets removed: 20 bp
|
||||
Output file will be GZIP compressed
|
||||
|
||||
|
||||
This is cutadapt 2.3 with Python 3.7.3
|
||||
Command line parameters: -j 1 -e 0.1 -q 20 -O 1 -a CTGTCTCTTATA test_R2.fastq.gz
|
||||
Processing reads on 1 core in single-end mode ...
|
||||
Finished in 0.22 s (22 us/read; 2.71 M reads/minute).
|
||||
|
||||
=== Summary ===
|
||||
|
||||
Total reads processed: 10,000
|
||||
Reads with adapters: 3,295 (33.0%)
|
||||
Reads written (passing filters): 10,000 (100.0%)
|
||||
|
||||
Total basepairs processed: 760,000 bp
|
||||
Quality-trimmed: 7,096 bp (0.9%)
|
||||
Total written (filtered): 745,649 bp (98.1%)
|
||||
|
||||
=== Adapter 1 ===
|
||||
|
||||
Sequence: CTGTCTCTTATA; Type: regular 3'; Length: 12; Trimmed: 3295 times.
|
||||
|
||||
No. of allowed errors:
|
||||
0-9 bp: 0; 10-12 bp: 1
|
||||
|
||||
Bases preceding removed adapters:
|
||||
A: 22.6%
|
||||
C: 28.2%
|
||||
G: 23.6%
|
||||
T: 25.6%
|
||||
none/other: 0.0%
|
||||
|
||||
Overview of removed sequences
|
||||
length count expect max.err error counts
|
||||
1 2213 2500.0 0 2213
|
||||
2 647 625.0 0 647
|
||||
3 239 156.2 0 239
|
||||
4 53 39.1 0 53
|
||||
5 10 9.8 0 10
|
||||
6 7 2.4 0 7
|
||||
7 8 0.6 0 8
|
||||
8 5 0.2 0 5
|
||||
9 5 0.0 0 5
|
||||
10 10 0.0 1 8 2
|
||||
11 2 0.0 1 2
|
||||
12 4 0.0 1 4
|
||||
13 7 0.0 1 7
|
||||
14 3 0.0 1 3
|
||||
15 4 0.0 1 4
|
||||
16 5 0.0 1 5
|
||||
17 3 0.0 1 3
|
||||
18 5 0.0 1 4 1
|
||||
19 2 0.0 1 1 1
|
||||
20 3 0.0 1 3
|
||||
21 7 0.0 1 7
|
||||
22 6 0.0 1 6
|
||||
23 3 0.0 1 3
|
||||
24 7 0.0 1 7
|
||||
25 4 0.0 1 4
|
||||
26 2 0.0 1 2
|
||||
27 4 0.0 1 4
|
||||
28 1 0.0 1 1
|
||||
29 3 0.0 1 3
|
||||
30 4 0.0 1 4
|
||||
32 3 0.0 1 3
|
||||
33 1 0.0 1 1
|
||||
34 1 0.0 1 1
|
||||
35 2 0.0 1 1 1
|
||||
40 1 0.0 1 0 1
|
||||
41 1 0.0 1 1
|
||||
46 1 0.0 1 0 1
|
||||
48 1 0.0 1 0 1
|
||||
49 2 0.0 1 0 2
|
||||
56 2 0.0 1 0 2
|
||||
59 1 0.0 1 0 1
|
||||
70 1 0.0 1 0 1
|
||||
73 2 0.0 1 0 2
|
||||
|
||||
RUN STATISTICS FOR INPUT FILE: test_R2.fastq.gz
|
||||
=============================================
|
||||
10000 sequences processed in total
|
||||
|
||||
Total number of sequences analysed for the sequence pair length validation: 10000
|
||||
|
||||
Number of sequence pairs removed because at least one read was shorter than the length cutoff (20 bp): 21 (0.21%)
|
1
software/trim_galore/test/output/test_R2_val_2.fq.gz
Symbolic link
1
software/trim_galore/test/output/test_R2_val_2.fq.gz
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../../tests/data/fastq/rna/test_R2_val_2.fq.gz
|
66946
tests/data/fasta/E_coli/NC_010473.fa
Normal file
66946
tests/data/fasta/E_coli/NC_010473.fa
Normal file
File diff suppressed because it is too large
Load diff
265
tests/data/fasta/msa/BBA0001.tfa
Normal file
265
tests/data/fasta/msa/BBA0001.tfa
Normal file
|
@ -0,0 +1,265 @@
|
|||
>seq001
|
||||
MAGKRKRANAPDQTERRSSVRVQKVRQKALDEKARLVQERVKLLSDRKSEICVDDTELHE
|
||||
KEEENVDGSPKRRSPPKLTAMQKGKQKLSVSLNGKDVNLEPHLKVTKCLRLFNKQYLLCV
|
||||
QAKLSRPDLKGVTEMIKAKAILYPRKIIGDLPGIDVGHRFFSRAEMCAVGFHNHWLNGID
|
||||
YMSMEYEKEYSNYKLPLAVSIVMSGQYEDDLDNADTVTYTGQGGHNLTGNKRQIKDQLLE
|
||||
RGNLALKHCCEYNVPVRVTRGHNCKSSYTKRVYTYDGLYKVEKFWAQKGVSGFTVYKYRL
|
||||
KRLEGQPELTTDQVNFVAGRIPTSTSEIEGLVCEDISGGLEFKGIPATNRVDDSPVSPTS
|
||||
GFTYIKSLIIEPNVIIPKSSTGCNCRGSCTDSKKCACAKLNGGNFPYVDLNDGRLIESRD
|
||||
VVFECGPHCGCGPKCVNRTSQKRLRFNLEVFRSAKKGWAVRSWEYIPAGSPVCEYIGVVR
|
||||
RTADVDTISDNEYIFEIDCQQTMQGLGGRQRRLRDVAVPMNNGVSQSSEDENAPEFCIDA
|
||||
GSTGNFARFINHSCEPNLFVQCVLSSHQDIRLARVVLFAADNISPMQELTYDYGYALDSV
|
||||
HGPDGKVKQLACYCGALNCRKRLY
|
||||
>seq002
|
||||
MPRHFGAVPGVVPGMAFVNRQELRDAGVHLPTQAGISGSASEGADSIVLSGGYEDDRDEG
|
||||
DVILYTGEGGRDPLTGHQVKPQQLVRGNLALAISHRDGLPLRVTRGHRHSSQFSPQSGYQ
|
||||
YAGLYRVDDHWREVGRSGFLIWRFRLTRLENQDAHHAGADPQHPDSQHPERRPTLVQRIV
|
||||
RDTATARAVKALYDHRCQVCGERLETPAGAYAEAAHIRPLGAPHHGPDVAGNILCLCPNH
|
||||
HVLFDFGAFSVGDDLRLLGLPGRLHVHPQHAVDREHLAYHRRHYALQAGLEWGCSVPLT
|
||||
>seq003
|
||||
MGVMENLMVHTEISKVKSQSNGEVEKRGVSVLENGGVCKLDRMSGLKFKRRKVFAVRDFP
|
||||
PGCGSRAMEVKIACENGNVVEDVKVVESLVKEEESLGQRDASENVSDIRMAEPVEVQPLR
|
||||
ICLPGGDVVRDLSVTAGDECSNSEQIVAGSGVSSSSGTENIVRDIVVYADESSLGMDNLD
|
||||
QTQPLEIEMSDVAVAKPRLVAGRKKAKKGIACHSSLKVVSREFGEGSRKKKSKKNLYWRD
|
||||
RESLDSPEQLRILGVGTSSGSSSGDSSRNKVKETLRLFHGVCRKILQEDEAKPEDQRRKG
|
||||
KGLRIDFEASTILKRNGKFLNSGVHILGEVPGVEVGDEFQYRMELNILGIHKPSQAGIDY
|
||||
MKYGKAKVATSIVASGGYDDHLDNSDVLTYTGQGGNVMQVKKKGEELKEPEDQKLITGNL
|
||||
ALATSIEKQTPVRVIRGKHKSTHDKSKGGNYVYDGLYLVEKYWQQVGSHGMNVFKFQLRR
|
||||
IPGQPELSWVEVKKSKSKYREGLCKLDISEGKEQSPISAVNEIDDEKPPLFTYTVKLIYP
|
||||
DWCRPVPPKSCCCTTRCTEAEARVCACVEKNGGEIPYNFDGAIVGAKPTIYECGPLCKCP
|
||||
SSCYLRVTQHGIKLPLEIFKTKSRGWGVRCLKSIPIGSFICEYVGELLEDSEAERRIGND
|
||||
EYLFDIGNRYDNSLAQGMSELMLGTQAGRSMAEGDESSGFTIDAASKGNVGRFINHSCSP
|
||||
NLYAQNVLYDHEDSRIPHVMFFAQDNIPPLQELCYDYNYALDQVRDSKGNIKQKPCFCGA
|
||||
AVCRRRLY
|
||||
>seq004
|
||||
MSTLLPFPDLNLMPDSQSSTAGTTAGDTVVTGKLEVKSEPIEEWQTPPSSTSDQSANTDL
|
||||
IAEFIRISELFRSAFKPLQVKGLDGVSVYGLDSGAIVAVPEKENRELIEPPPGFKDNRVS
|
||||
TVVVSPKFERPRELARIAILGHEQRKELRQVMKRTRMTYESLRIHLMAESMKNHVLGQGR
|
||||
RRRSDMAAAYIMRDRGLWLNYDKHIVGPVTGVEVGDIFFYRMELCVLGLHGQTQAGIDCL
|
||||
TAERSATGEPIATSIVVSGGYEDDEDTGDVLVYTGHGGQDHQHKQCDNQRLVGGNLGMER
|
||||
SMHYGIEVRVIRGIKYENSISSKVYVYDGLYKIVDWWFAVGKSGFGVFKFRLVRIEGQPM
|
||||
MGSAVMRFAQTLRNKPSMVRPTGYVSFDLSNKKENVPVFLYNDVDGDQEPRHYEYIAKAV
|
||||
FPPGIFGQGGISRTGCECKLSCTDDCLCARKNGGEFAYDDNGHLLKGKHVVFECGEFCTC
|
||||
GPSCKSRVTQKGLRNRLEVFRSKETGWGVRTLDLIEAGAFICEYAGVVVTRLQAEILSMN
|
||||
GDVMVYPGRFTDQWRNWGDLSQVYPDFVRPNYPSLPPLDFSMDVSRMRNVACYISHSKEP
|
||||
NVMVQFVLHDHNHLMFPRVMLFALENISPLAELSLDYGLADEVNGKLAICN
|
||||
>seq005
|
||||
MVHSESSILSSLRGGDGGGIPCSKDELAINGSYTDPMGRRKSKRFKVAAESEFSPDFGSI
|
||||
TRQLRSRRMQKEFTVETYETRNVSDVCVLSSQADVELIPGEIVAERDSFKSVDCNDMSVG
|
||||
LTEGAESLGVNMQEPMKDRNMPENTSEQNMVEVHPPSISLPEEDMMGSVCRKSITGTKEL
|
||||
HGRTISVGRDLSPNMGSKFSKNGKTAKRSISVEEENLVLEKSDSGDHLGPSPEVLELEKS
|
||||
EVWIITDKGVVMPSPVKPSEKRNGDYGEGSMRKNSERVALDKKRLASKFRLSNGGLPSCS
|
||||
SSGDSARYKVKETMRLFHETCKKIMQEEEARPRKRDGGNFKVVCEASKILKSKGKNLYSG
|
||||
TQIIGTVPGVEVGDEFQYRMELNLLGIHRPSQSGIDYMKDDGGELVATSIVSSGGYNDVL
|
||||
DNSDVLIYTGQGGNVGKKKNNEPPKDQQLVTGNLALKNSINKKNPVRVIRGIKNTTLQSS
|
||||
VVAKNYVYDGLYLVEEYWEETGSHGKLVFKFKLRRIPGQPELPWKEVAKSKKSEFRDGLC
|
||||
NVDITEGKETLPICAVNNLDDEKPPPFIYTAKMIYPDWCRPIPPKSCGCTNGCSKSKNCA
|
||||
CIVKNGGKIPYYDGAIVEIKPLVYECGPHCKCPPSCNMRVSQHGIKIKLEIFKTESRGWG
|
||||
VRSLESIPIGSFICEYAGELLEDKQAESLTGKDEYLFDLGDEDDPFTINAAQKGNIGRFI
|
||||
NHSCSPNLYAQDVLYDHEEIRIPHIMFFALDNIPPLQELSYDYNYKIDQVYDSNGNIKKK
|
||||
FCYCGSAECSGRLY
|
||||
>seq006
|
||||
MERNGGHYTDKTRVLDIKPLRTLRPVFPSGNQAPPFVCAPPFGPFPPGFSSFYPFSSSQA
|
||||
NQHTPDLNQAQYPPQHQQPQNPPPVYQQQPPQHASEPSLVTPLRSFRSPDVSNGNAELEG
|
||||
STVKRRIPKKRPISRPENMNFESGINVADRENGNRELVLSVLMRFDALRRRFAQLEDAKE
|
||||
AVSGIIKRPDLKSGSTCMGRGVRTNTKKRPGIVPGVEIGDVFFFRFEMCLVGLHSPSMAG
|
||||
IDYLVVKGETEEEPIATSIVSSGYYDNDEGNPDVLIYTGQGGNADKDKQSSDQKLERGNL
|
||||
ALEKSLRRDSAVRVIRGLKEASHNAKIYIYDGLYEIKESWVEKGKSGHNTFKYKLVRAPG
|
||||
QPPAFASWTAIQKWKTGVPSRQGLILPDMTSGVESIPVSLVNEVDTDNGPAYFTYSTTVK
|
||||
YSESFKLMQPSFGCDCANLCKPGNLDCHCIRKNGGDFPYTGNGILVSRKPMIYECSPSCP
|
||||
CSTCKNKVTQMGVKVRLEVFKTANRGWGLRSWDAIRAGSFICIYVGEAKDKSKVQQTMAN
|
||||
DDYTFDTTNVYNPFKWNYEPGLADEDACEEMSEESEIPLPLIISAKNVGNVARFMNHSCS
|
||||
PNVFWQPVSYENNSQLFVHVAFFAISHIPPMTELTYDYGVSRPSGTQNGNPLYGKRKCFC
|
||||
GSAYCRGSFG
|
||||
>seq007
|
||||
MQGVPGFNTVPNPNHYDKSIVLDIKPLRSLKPVFPNGNQGPPFVGCPPFGPSSSEYSSFF
|
||||
PFGAQQPTHDTPDLNQTQNTPIPSFVPPLRSYRTPTKTNGPSSSSGTKRGVGRPKGTTSV
|
||||
KKKEKKTVANEPNLDVQVVKKFSSDFDSGISAAEREDGNAYLVSSVLMRFDAVRRRLSQV
|
||||
EFTKSATSKAAGTLMSNGVRTNMKKRVGTVPGIEVGDIFFSRIEMCLVGLHMQTMAGIDY
|
||||
IISKAGSDEESLATSIVSSGRYEGEAQDPESLIYSGQGGNADKNRQASDQKLERGNLALE
|
||||
NSLRKGNGVRVVRGEEDAASKTGKIYIYDGLYSISESWVEKGKSGCNTFKYKLVRQPGQP
|
||||
PAFGFWKSVQKWKEGLTTRPGLILPDLTSGAESKPVSLVNDVDEDKGPAYFTYTSSLKYS
|
||||
ETFKLTQPVIGCSCSGSCSPGNHNCSCIRKNDGDLPYLNGVILVSRRPVIYECGPTCPCH
|
||||
ASCKNRVIQTGLKSRLEVFKTRNRGWGLRSWDSLRAGSFICEYAGEVKDNGNLRGNQEED
|
||||
AYVFDTSRVFNSFKWNYEPELVDEDPSTEVPEEFNLPSPLLISAKKFGNVARFMNHSCSP
|
||||
NVFWQPVIREGNGESVIHIAFFAMRHIPPMAELTYDYGISPTSEARDESLLHGQRTCLCG
|
||||
SEQCRGSFG
|
||||
>seq008
|
||||
MGSSHIPLDPSLNPSPSLIPKLEPVTESTQNLAFQLPNTNPQALISSAVSDFNEATDFSS
|
||||
DYNTVAESARSAFAQRLQRHDDVAVLDSLTGAIVPVEENPEPEPNPYSTSDSSPSVATQR
|
||||
PRPQPRSSELVRITDVGPESERQFREHVRKTRMIYDSLRMFLMMEEAKRNGVGGRRARAD
|
||||
GKAGKAGSMMRDCMLWMNRDKRIVGSIPGVQVGDIFFFRFELCVMGLHGHPQSGIDFLTG
|
||||
SLSSNGEPIATSVIVSGGYEDDDDQGDVIMYTGQGGQDRLGRQAEHQRLEGGNLAMERSM
|
||||
YYGIEVRVIRGLKYENEVSSRVYVYDGLFRIVDSWFDVGKSGFGVFKYRLERIEGQAEMG
|
||||
SSVLKFARTLKTNPLSVRPRGYINFDISNGKENVPVYLFNDIDSDQEPLYYEYLAQTSFP
|
||||
PGLFVQQSGNASGCDCVNGCGSGCLCEAKNSGEIAYDYNGTLIRQKPLIHECGSACQCPP
|
||||
SCRNRVTQKGLRNRLEVFRSLETGWGVRSLDVLHAGAFICEYAGVALTREQANILTMNGD
|
||||
TLVYPARFSSARWEDWGDLSQVLADFERPSYPDIPPVDFAMDVSKMRNVACYISHSTDPN
|
||||
VIVQFVLHDHNSLMFPRVMLFAAENIPPMTELSLDYGVVDDWNAKLAICN
|
||||
>seq009
|
||||
MDKSIPIKAIPVACVRPDLVDDVTKNTSTIPTMVSPVLTNMPSATSPLLMVPPLRTIWPS
|
||||
NKEWYDGDAGPSSTGPIKREASDNTNDTAHNTFAPPPEMVIPLITIRPSDDSSNYSCDAG
|
||||
AGPSTGPVKRGRGRPKGSKNSTPTEPKKPKVYDPNSLKVTSRGNFDSEITEAETETGNQE
|
||||
IVDSVMMRFDAVRRRLCQINHPEDILTTASGNCTKMGVKTNTRRRIGAVPGIHVGDIFYY
|
||||
WGEMCLVGLHKSNYGGIDFFTAAESAVEGHAAMCVVTAGQYDGETEGLDTLIYSGQGGTD
|
||||
VYGNARDQEMKGGNLALEASVSKGNDVRVVRGVIHPHENNQKIYIYDGMYLVSKFWTVTG
|
||||
KSGFKEFRFKLVRKPNQPPAYAIWKTVENLRNHDLIDSRQGFILEDLSFGAELLRVPLVN
|
||||
EVDEDDKTIPEDFDYIPSQCHSGMMTHEFHFDRQSLGCQNCRHQPCMHQNCTCVQRNGDL
|
||||
LPYHNNILVCRKPLIYECGGSCPCPDHCPTRLVQTGLKLHLEVFKTRNCGWGLRSWDPIR
|
||||
AGTFICEFAGLRKTKEEVEEDDDYLFDTSKIYQRFRWNYEPELLLEDSWEQVSEFINLPT
|
||||
QVLISAKEKGNVGRFMNHSCSPNVFWQPIEYENRGDVYLLIGLFAMKHIPPMTELTYDYG
|
||||
VSCVERSEEDEGFLVCPYLSSSLWPSSSEIHFLINSKGRAWYDKIYRKLASQGNVSSGLD
|
||||
SVKDEPEKLREEQMEGDGFKEKLSDSVLIDEKLEEYSDCDRTATTSRSHTDPVSSQSTHQ
|
||||
TPESFRTPITCDDDTFVSVSGISRDVSNLIPFATETPASPVQEKMANTRSFSNNSVKGNQ
|
||||
DEFFIEDFDVGPMDTIDLYDMTFREDPSDFDDNLLYAMRDRTKQLRSFKRKIMDAIKSKR
|
||||
RREKEYEQLAIWFGDADMGCDLVNDKEQSTTSIDSKSSQTNVPVVSEDSEWEIL
|
||||
>seq010
|
||||
MMMTQRISPSNKRRRVSFVRDFPQFSVKDESDIGGDDVATIKENLDGKEDSNCVGVAYRD
|
||||
HHRPKEESFDSIMKKAGFNVANGNLGNGKFPPSKRNVPLPCEGKVQPLSVEEGIKLMAYE
|
||||
SQRRRCFGKPLVSTKVVQKHRYSPAKKKLSNATALRVRHSPMKKLSNASRLRANAHRPTQ
|
||||
HKDERRSGVLSVIQRNRLSKDLTPRQKVQEVLRIFTLVFDELDRNKAARRGGSETAKSRI
|
||||
DYQTWTILREMGMQVNSQKRIGSVPGIKVGDKIQFKAALSVIGLHFGIMSGIDYMYKGNK
|
||||
EVATSIVSSEGNDYGDRFINDVMIYCGQGGNMRSKDHKAIKDQKLVGGNLALANSIKEKT
|
||||
PVRVIRGERRLDNRGKDYVYDGLYRVEKYWEERGPQGNILFKFKLRRTCQPYVDF
|
||||
>seq011
|
||||
MSQKRSLVFAIRDFPPGCGTHIDVSSSLNHPAEKAFKHPRTGDVSGENLSFAEAKPEGTC
|
||||
LKRESADQDHIFAAPEHNAKREPAGQDHVVAATTVAYATSSHRQKVEIGNSDCDPTPREK
|
||||
VLEVLSLFKQVYNQLDRDKKARRGGDFLDATSRIDLKTLTVLEKMGKQVNTEKRIGSVPG
|
||||
INIGDVFQYKTELRVVGLHSKPMCGIDYIKLGDDRITTSIVASEGYGYNDTYNSGVMVYT
|
||||
GEGGNVINKQKKTEDQKLVKGNLALATSMRQKSQVRVIRGEERLDRKGKRYVYDGLYMVE
|
||||
EYWVERDVRGKSVYKFKLCRIPGQLPLT
|
||||
>seq012
|
||||
MCLVGLHRNTAGGIDSLLAKESGVDGPAATSVVTSGKYDNETEDLETLIYSGHGGKPCDQ
|
||||
VLQRGNRALEASVRRRNEVRVIRGELYNNEKVYIYDGLYLVSDCWQVTGKSGFKEYRFKL
|
||||
LRKPGQPPGYAIWKLVENLRNHELIDPRQGFILGDLSFGEEGLRVPLVNEVDEEDKTIPD
|
||||
DFDYIRSQCYSGMTNDVNVDSQSLVQSYIHQNCTCILKNCGQLPYHDNILVCRKPLIYEC
|
||||
GGSCPTRMVETGLKLHLEVFKTSNCGWGLRSWDPIRAGTFICEFTGVSKTKEEVEEDDDY
|
||||
LFDTSRIYHSFRWNYEPELLCEDACEQVSEDANLPTQVLISAKEKGNVGRFMNHNCWPNV
|
||||
FWQPIEYDDNNGHIYVRIGLFAMKHIPPMTELTYDYGISCVEKTGEDEVIYKGKKICLCG
|
||||
SVKCRGSFG
|
||||
>seq013
|
||||
MGLVGLHSGTIDMEFIGVEDHGDEEGKQIAVSVISSGKNADKTEDPDSLIFTGFGGTDMY
|
||||
HGQPCNQKLERLNIPLEAAFRKKSIVRVVRCMKDEKRTNGNIYIYDGTYMITNRWEEEGQ
|
||||
NGFIVFKFKLVREPDQKPAFGIWKSIQNWRNGLSIRPGLILEDLSNGAENLKVCLVNEVD
|
||||
KENGPALFRYVTSLIHEVINNIPSMVDRCACGRRSCGSKHVFREKLSVSSSLVISAKKSG
|
||||
NVARFMNHSCSPNVFWQSIAREQNGLWCLYIGFFAMKHIPPLTELRYDYGKSRGGGKKMC
|
||||
LCRTKKCCGSFG
|
||||
>seq014
|
||||
MTRVNQLPCDCVSTAEESLTSGTCITPTHVTSLSSPLDRSGDVDPLPVSDESGGSKADES
|
||||
MTDADETKKRKRILSGDCEADENNKSDGEIASLNDGVDAFTAICEDLNCSLCNQLPDRPV
|
||||
TILCGHNFCLKCFDKWIDQGNQICATCRSTIPDKMAANPRVNSSLVSVIRYVKVAKTAGV
|
||||
GTANFFPFTSNQDGPENAFRTKRAKIGEENAARIYVTVPFDHFGPIPAEHDPVRNQGVLV
|
||||
GESWENRVECRQWGVHLPHVSCIAGQEDYGAQSVVISGGYKDDEDHGEWFLYTGRSRGRH
|
||||
FANEDQEFEDLNEALRVSCEMGYPVRVVRSYKDRYSAYAPKEGVRYDGVYRIEKCWRKAR
|
||||
FPVCRYLFVRCDNEPAPWNSDESGDRPRPLPNIPELETASDLFERKESPSWDFDEAEGRW
|
||||
RWMKPPPANHEQRERMKMAMTCLLLFVLIILVGSSSILYQY
|
||||
>seq015
|
||||
MTRVNQLPCDCVSTAEESLTSGTCITPTHVTSLSSPLDRSGDVDPLPVSDESGGSKADES
|
||||
MTDADETKKRKRILSGDCEADENNKSDGEIASLNDGVDAFTAICEDLNCSLCNQLPDRPV
|
||||
TILCGHNFCLKCFDKWIDQGNQICATCRSTIPDKMAANPRVNSSLVSVIRYVKVAKTAGV
|
||||
GTANFFPFTSNQDGPENAFRTKRAKIGEENAARIYVTVPFDHFGPIPAEHDPVRNQGVLV
|
||||
GESWENRVECRQWGVHLPHVSCIAGQEDYGAQSVVISGGYKDDEDHGEWFLYTGRRSYKD
|
||||
RYSAYAPKEGVRYDGVYRIEKCWRKARFPDSFKVCRYLFVRCDNEPAPWNSDESGDRPRP
|
||||
LPNIPELETASDLFERKESPSWDFDEAEGRWRWMKPPPANHEQRERMKMAMTCLLLFVLI
|
||||
ILVGSSSILYQY
|
||||
>seq016
|
||||
MAEQPRINSALVSVIRMAKVSKNANSAVSAAAYHYIRNDDRPDKAFTTERAKRAGKANAS
|
||||
SGQIFVTIPPDHFGPILAENDPKRSIGVLVGDTWEDRLECRQWGAHFPHVAGIAGQSTHG
|
||||
AQSVALSGGYVDDEDHGEWFLYTGSGGRDLSGNKRTNKEQSSDQKFEKLNAALRISCLKG
|
||||
YPVRVVRSHKEKRSSYAPEAGVRYDGVYRIEKCWRKISVQGKFKVCRYLFVRCDNEPAPW
|
||||
TSDIYGDRPRPLPKVDELKGATDISERKGTPSWDFDEKEGWKWVKPPPISRKPNLSGDPA
|
||||
TDKEIRRVARRAQMSVTERLLKEFGCSICKQVMKEPLTTPCAHNFCKLCLVGTYGSQSSM
|
||||
RERSRGGRTLRAQKIVKKCPSCPTDICDFLENPQINREMMDLIESLQRKAVEEGDTKTSS
|
||||
DVSNGAESSGDDGNNEALEKGEDDSSLKDDGSLKDDGKVVKAVVVIKEEDLQPKKSKGED
|
||||
EKEQGDKKMDSADVVDIAVEKKQATKRASEKAEKKQARKRKGDAVATNDGKRMKTGGDAM
|
||||
ETAAEEDAPLSGGTPVKRNSRKSSEVDAKGGGGSPVVSSPRRVTRSNAKASGEADGSPAT
|
||||
RTRRATRAEA
|
||||
>seq017
|
||||
MTPATQYPCDPEGVCMRCKSMPPPEESLTCGTCVTPWHVSCLLSPPETLSATLQWLCPDC
|
||||
SGETNPLPVSGVAAGYGSVGSDLVAAIHSIEADETLSAEEKAKKKQQLLSGKGVVDEDDE
|
||||
EEKKKTSKGKKPIDVLSHFECSFCMQSLQKPVSVRVLFALALMLVWFLESTPCGHNACLK
|
||||
CFLKWMGQGHRSCGTCRSVIPESMVTNPRINLSIVSAIRLARVSEKADARTSKVVHYVDN
|
||||
EDRPDKAFTTERAKKTGNANASSGKIFVTIPRDHFGPIPAENDPVRNQGLLVGESWKGRL
|
||||
ACRQWGAHFPHVSGIAGQASYGAQSVVLAGGYDDDEDHGEWFLYTGRTNTVQAFDQVFLN
|
||||
FNEALRLSCKLGYPVRVVRSTKDKRSPYAPQGGLLRYDGVYRIEKCWRIVGIQMCRFLFV
|
||||
RCDNEPAPWTSDEHGDRPRPLPNVPELNMATDLFERKESPSWDFDEGEDRWRWMKPPPAS
|
||||
KKAVKNVLDPEERKLLREAIKSANPNTMRARLLKEFKCQICQKVMTNPVTTPCAHNFCKA
|
||||
CLESKFAGTALVRERGSGGRKLRSQKSVMKCPCCPTDIAEFVQNPQVNREVAEVIEKLKK
|
||||
QEEEENAKSLDEGQCSGTSHEEEDDEQPKKRIKLDTDAEVSATVVESDMK
|
||||
>seq018
|
||||
MARDIQLPCDGDGVCMRCKSNPPPEESLTCGTCVTPWHVSCLSSPPKTLASTLQWHCPDC
|
||||
SGEIDPLPVSGGATGFESAGSDLVAAIRAIEADESLSTEEKAKMRQRLLSGKGVEEDDEE
|
||||
EKRKKKGKGKNPNLDVLSALGDNLMCSFCMQLPERPVTKPCGHNACLKCFEKWMGQGKRT
|
||||
CGKCRSIIPEKMAKNPRINSSLVAAIRLAKVSKSAAATTSKVFHFISNQDRPDKAFTTER
|
||||
AKKTGKANAASGKIYVTIPPDHFGPIPAENDPVRNQGLLVGESWEDRLECRQWGAHFPHV
|
||||
AGIAGQSTYGAQSVALSGGYKDDEDHGEWFLYTGSGGRDLSGNKRTNKEQSFDQKFEKSN
|
||||
AALKLSCKLGYPVRVVRSHKEKRSAYAPEEGVRYDGVYRIEKCWRKVGVQVCRYLFVRCD
|
||||
NEPAPWTSDENGDRPRPIPNIPELNMATDLFERKETPSWDFDEGEGCWKWMKPPPASKKS
|
||||
VNVLAPEERKNLRKAIKAAHSNTMRARLLKEFKCQICQQVLTLPVTTPCAHNFCKACLEA
|
||||
KFAGKTLVRERSTGGRTLRSRKNVLNCPCCPTDISDFLQNPQVNREVAEVIEKLKTQEED
|
||||
TAELEDEDEGECSGTTPEEDSEQPKKRIKLDTDATVSATIR
|
||||
>seq019
|
||||
MAIQTQLPCDGDGVCMRCQVTPPSEETLTCGTCVTPWHVSCLLPESLASSTGDWECPDCS
|
||||
GVVVPSAAPGTGISGPESSGSVLVAAIRAIQADVTLTEAEKAKKRQRLMSGGGDDGVDDE
|
||||
EKKKLEIFCSICIQLPERPVTTPCGHNFCLKCFEKWAVGQGKLTCMICRSKIPRHVAKNP
|
||||
RINLALVSAIRLANVTKCSGEATAAKVHHIIRNQDRPDKAFTTERAVKTGKANAASGKFF
|
||||
VTIPRDHFGPIPAANDVTRNQGVLVGESWEDRQECRQWGVHFPHVAGIAGQAAVGAQSVA
|
||||
LSGGYDDDEDHGEWFLYTGSGGRDLSGNKRVNKIQSSDQAFKNMNEALRLSCKMGYPVRV
|
||||
VRSWKEKRSAYAPAEGVRYDGVYRIEKCWSNVGVQGLHKMCRYLFVRCDNEPAPWTSDEH
|
||||
GDRPRPLPDVPELENATDLFVRKESPSWGFDEAEGRWKWMKSPPVSRMALDTEERKKNKR
|
||||
AKKGNNAMKARLLKEFSCQICRKVLSLPVTTPCAHNFCKACLEAKFAGITQLRDRSNGVR
|
||||
KLRAKKNIMTCPCCTTDLSEFLQNPQVNREMMEIIENFKKSEEEAEVAESSNISEEEEEE
|
||||
SEPPTKKIKMDNNSVGDTSLSA
|
||||
>seq020
|
||||
MAIQTQLPCDGDGVCMRCQVNPPSEETLTCGTCVTPWHVSCLLPESLASSTGDWECPDCS
|
||||
GVVVPSAAPGTGISGPESSGSVLVTAIRAIQADVTLTEAEKAKKRQRLMSGGGDDGVDDE
|
||||
EKKKLEIFCSICIQLPERPVTTPCGHNFCLKCFEKWAVGQGKLTCMICRSKIPRHVAKNP
|
||||
RINLALVSAIRLANVTKCSGEATAAKVHHIIRNQDRPDKAFTTERAVKTGKANAASGVLV
|
||||
GESWEDRQECRQWGVHFPHVAGIAGQAAVGAQSVALSGGYDDDEDHGEWFLYTGSGGRDL
|
||||
SGNKRVNKIQSSDQAFKNMNEALRLSCKMGYPVRVVRSWKEKRSAYAPAEGVRYDGVYRI
|
||||
EKCWSNVGVQGLHKMCRYLFVRCDNEPAPWTSDEHGDRPRPLPDVPELENATDLFVRKES
|
||||
PSWGFDEAEGRWKWMKSPPVSRMALDTEERKKNKRAKKGNNAMKARLLKEFSCQICRKVL
|
||||
SLPVTTPCAHNFCKACLEAKFAGITQLRDRSNGVRKLRAKKNIMTCPCCTTDLSEFLQNP
|
||||
QVNREMMEIIENFKKSEEEAEVAESSNISEEEGEEESEPPTKKIKMDKNSVGGTSLSA
|
||||
>seq021
|
||||
MAIETQLPCDGDGVCMRCQVNPPSEETLTCGTCVTPWHVPCLLPESLASSTGEWECPDCS
|
||||
GVVVPSAAPGTGNARPESSGSVLVAAIRAIQADETLTEAEKAKKRQKLMSGGGDDGVDEE
|
||||
EKKKLEIFCSICIQLPERPITTPCGHNFCLKCFEKWAVGQGKLTCMICRSKIPRHVAKNP
|
||||
RINLALVSAIRLANVTKCSVEATAAKVHHIIRNQDRPEKAFTTERAVKTGKANAASGKFF
|
||||
VTIPRDHFGPIPAENDVTRKQGVLVGESWEDRQECRQWGAHFPHIAGIAGQSAVGAQSVA
|
||||
LSGGYDDDEDHGEWFLYTGSGGRDLSGNKRINKKQSSDQAFKNMNESLRLSCKMGYPVRV
|
||||
VRSWKEKRSAYAPAEGVRYDGVYRIEKCWSNVGVQGSFKVCRYLFVRCDNEPAPWTSDEH
|
||||
GDRPRPLPNVPELETAADLFVRKESPSWDFDEAEGRWKWMKSPPVSRMALDPEERKKNKR
|
||||
AKNTMKARLLKEFSCQICREVLSLPVTTPCAHNFCKACLEAKFAGITQLRERSNGGRKLR
|
||||
AKKNIMTCPCCTTDLSEFLQNPQVNREMMEIIENFKKSEEEADASISEEEEEESEPPTKK
|
||||
IKMDNNSVGGSGTSLSA
|
||||
>seq022
|
||||
MWIQVRTMDGRQTHTVDSLSRLTKVEELRRKIQELFHVEPGLQRLFYRGKQMEDGHTLFD
|
||||
YEVRLNDTIQLLVRQSLVLPHSTKERDSELSDTDSGCCLGQSESDKSSTHGEAAAETDSR
|
||||
PADEDMWDETELGLYKVNEYVDARDTNMGAWFEAQVVRVTRKAPSRDEPCSSTSRPALEE
|
||||
DVIYHVKYDDYPENGVVQMNSRDVRARARTIIKWQDLEVGQVVMLNYNPDNPKERGFWYD
|
||||
AEISRKRETRTARELYANVVLGDDSLNDCRIIFVDEVFKIERPGEGSPMVDNPMRRKSGP
|
||||
SCKHCKDDVNRLCRVCACHLCGGRQDPDKQLMCDECDMAFHIYCLDPPLSSVPSEDEWYC
|
||||
PECRNDASEVVLAGERLRESKKNAKMASATSSSQRDWGKGMACVGRTKECTIVPSNHYGP
|
||||
IPGIPVGTMWRFRVQVSESGVHRPHVAGIHGRSNDGSYSLVLAGGYEDDVDHGNFFTYTG
|
||||
SGGRDLSGNKRTAEQSCDQKLTNTNRALALNCFAPINDQEGAEAKDWRSGKPVRVVRNVK
|
||||
GGKNSKYAPAEGNRYDGIYKVVKYWPEKGKSGFLVWRYLLRRDDDEPGPWTKEGKDRIKK
|
||||
LGLTMQYPEGYLEALANREREKENSKREEEEQQEGGFASPRTGKGKWKRKSAGGGPSRAG
|
||||
SPRRTSKKTKVEPYSLTAQQSSLIREDKSNAKLWNEVLASLKDRPASGSPFQLFLSKVEE
|
||||
TFQCICCQELVFRPITTVCQHNVCKDCLDRSFRAQVFSCPACRYDLGRSYAMQVNQPLQT
|
||||
VLNQLFPGYGNGR
|
||||
>seq023
|
||||
MWIQVRTMDGKETHTVNSLSRLTKVQELRKKIEEVFHVEPQLQRLFYRGKQMEDGHTLFD
|
||||
YDVRLNDTIQLLVRQSLALPLSTKERDSELSDSDSGYGVGHSESDKSSTHGEGAAEADDK
|
||||
TVWEDTDLGLYKVNEYVDVRDNIFGAWFEAQVVQVQKRALSEDEPCSSSAVKTSEDDIMY
|
||||
HVKYDDYPEHGVDIVKAKNVRARARTVIPWENLEVGQVVMANYNVDYPRKRGFWYDVEIC
|
||||
RKRQTRTARELYGNIRLLNDSQLNNCRIMFVDEVLMIELPKERRPLIASPSQPPPALRNT
|
||||
GKSGPSCRFCKDDENKPCRKCACHVCGGREAPEKQLLCDECDMAFHLYCLKPPLTSVPPE
|
||||
PEWYCPSCRTDSSEVVQAGEKLKESKKKAKMASATSSSRRDWGKGMACVGRTTECTIVPA
|
||||
NHFGPIPGVPVGTMWRFRVQVSESGVHRPHVAGIHGRSNDGAYSLVLAGGYEDDVDNGNY
|
||||
FTYTGSGGRDLSGNKRTAGQSSDQKLTNNNRALALNCHSPINEKGAEAEDWRQGKPVRVV
|
||||
RNMKGGKHSKYAPAEGNRYDGIYKVVKYWPERGKSGFLVWRYLLRRDDTEPEPWTREGKD
|
||||
RTRQLGLTMQYPEGYLEALANKEKSRKRPAKALEQGPSSSKTGKSKQKSTGPTLSSPRAS
|
||||
KKSKLEPYTLSEQQANLIKEDKGNAKLWDDVLTSLQDGPYQIFLSKVKEAFQCICCQELV
|
||||
FRPVTTVCQHNVCKDCLDRSFRAQVFSCPACRFELDHSSPTRVNQPLQTILNQLFPGYGS
|
||||
GR
|
BIN
tests/data/fastq/dna/Ecoli_DNA_R1.fastq.gz
Normal file
BIN
tests/data/fastq/dna/Ecoli_DNA_R1.fastq.gz
Normal file
Binary file not shown.
BIN
tests/data/fastq/dna/Ecoli_DNA_R2.fastq.gz
Normal file
BIN
tests/data/fastq/dna/Ecoli_DNA_R2.fastq.gz
Normal file
Binary file not shown.
BIN
tests/data/fastq/dna/SRR396636_R1.fastq.gz
Normal file
BIN
tests/data/fastq/dna/SRR396636_R1.fastq.gz
Normal file
Binary file not shown.
BIN
tests/data/fastq/dna/SRR396636_R2.fastq.gz
Normal file
BIN
tests/data/fastq/dna/SRR396636_R2.fastq.gz
Normal file
Binary file not shown.
BIN
tests/data/fastq/methylated_dna/Ecoli_10K_methylated_R1.fastq.gz
Normal file
BIN
tests/data/fastq/methylated_dna/Ecoli_10K_methylated_R1.fastq.gz
Normal file
Binary file not shown.
BIN
tests/data/fastq/methylated_dna/Ecoli_10K_methylated_R2.fastq.gz
Normal file
BIN
tests/data/fastq/methylated_dna/Ecoli_10K_methylated_R2.fastq.gz
Normal file
Binary file not shown.
60
tests/data/fastq/methylated_dna/bisulfite-seq_test_data.md
Normal file
60
tests/data/fastq/methylated_dna/bisulfite-seq_test_data.md
Normal file
|
@ -0,0 +1,60 @@
|
|||
# E. coli paired-end test dataset for Bisulfite-seq applications
|
||||
|
||||
The E. coli data set was generated using [Sherman](https://github.com/FelixKrueger/Sherman) as 10,000 reads of paired-end data, with average methylation levels of 80% in CpG context, and 10% in non-CG context. The files can be found in the folder: genaral/fastq/dna, and are called:
|
||||
|
||||
|
||||
`Ecoli_10K_methylated_R1.fastq.gz`
|
||||
|
||||
`Ecoli_10K_methylated_R2.fastq.gz`
|
||||
|
||||
```bash
|
||||
Sherman --non_dir --genome /bi/scratch/Genomes/E_coli/ --paired -n 10000 -l 100 --CG 20 --CH 90
|
||||
```
|
||||
|
||||
The data is non-directional, so it should produce roughly 25% mapping to each of the `OT`, `CTOT`, `CTOB` and `OB` strands. Thus, the data can be used for bisulfite mapping in standard (= directional), `--pbat` and `--non_directional` mode.
|
||||
|
||||
A test alignment should look roughly like this:
|
||||
|
||||
`bismark --genome /bi/scratch/Genomes/E_coli/ -1 Ecoli_10K_methylated_R1.fastq.gz -2 Ecoli_10K_methylated_R2.fastq.gz --non_dir`
|
||||
|
||||
``` csv
|
||||
Bismark report for: Ecoli_10K_methylated_R1.fastq.gz and Ecoli_10K_methylated_R2.fastq.gz (version: v0.22.3)
|
||||
Bismark was run with Bowtie 2 against the bisulfite genome of /bi/scratch/Genomes/E_coli/ with the specified options: -q --score-min L,0,-0.2 --ignore-quals --no-mixed --no-discordant --dovetail --maxins 500
|
||||
Option '--non_directional' specified: alignments to all strands were being performed (OT, OB, CTOT, CTOB)
|
||||
|
||||
Final Alignment report
|
||||
======================
|
||||
Sequence pairs analysed in total: 10000
|
||||
Number of paired-end alignments with a unique best hit: 9320
|
||||
Mapping efficiency: 93.2%
|
||||
Sequence pairs with no alignments under any condition: 0
|
||||
Sequence pairs did not map uniquely: 680
|
||||
Sequence pairs which were discarded because genomic sequence could not be extracted: 0
|
||||
|
||||
Number of sequence pairs with unique best (first) alignment came from the bowtie output:
|
||||
CT/GA/CT: 2341 ((converted) top strand)
|
||||
GA/CT/CT: 2329 (complementary to (converted) top strand)
|
||||
GA/CT/GA: 2356 (complementary to (converted) bottom strand)
|
||||
CT/GA/GA: 2294 ((converted) bottom strand)
|
||||
|
||||
Final Cytosine Methylation Report
|
||||
=================================
|
||||
Total number of C's analysed: 471997
|
||||
|
||||
Total methylated C's in CpG context: 111011
|
||||
Total methylated C's in CHG context: 11923
|
||||
Total methylated C's in CHH context: 21433
|
||||
Total methylated C's in Unknown context: 0
|
||||
|
||||
Total unmethylated C's in CpG context: 27790
|
||||
Total unmethylated C's in CHG context: 105877
|
||||
Total unmethylated C's in CHH context: 193963
|
||||
Total unmethylated C's in Unknown context: 0
|
||||
|
||||
C methylated in CpG context: 80.0%
|
||||
C methylated in CHG context: 10.1%
|
||||
C methylated in CHH context: 10.0%
|
||||
Can't determine percentage of methylated Cs in unknown context (CN or CHN) if value was 0
|
||||
|
||||
Bismark completed in 0d 0h 0m 12s
|
||||
```
|
7
tests/data/fastq/rna/paired_end_RNAseq_test_data.md
Normal file
7
tests/data/fastq/rna/paired_end_RNAseq_test_data.md
Normal file
|
@ -0,0 +1,7 @@
|
|||
# Paired-end RNA-seq test dataset
|
||||
|
||||
The data here are 2x 76bp RNA-seq data from mouse (10,000 reads of paired-end data). The files can be found in the folder: genaral/fastq/rna, and are called:
|
||||
|
||||
`test_R1.fastq.gz`
|
||||
|
||||
`test_R2.fastq.gz`
|
Binary file not shown.
BIN
tests/data/fastq/rna/test_R1_val_1.fq.gz
Normal file
BIN
tests/data/fastq/rna/test_R1_val_1.fq.gz
Normal file
Binary file not shown.
Binary file not shown.
BIN
tests/data/fastq/rna/test_R2_val_2.fq.gz
Normal file
BIN
tests/data/fastq/rna/test_R2_val_2.fq.gz
Normal file
Binary file not shown.
Loading…
Reference in a new issue