mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-12-22 11:08:17 +00:00
Merge branch 'master' into fastqc-unit-test
This commit is contained in:
commit
3406ed4da0
87 changed files with 74647 additions and 54 deletions
18
.github/workflows/cutadapt.yml
vendored
18
.github/workflows/cutadapt.yml
vendored
|
@ -1,8 +1,15 @@
|
||||||
name: cutadapt
|
name: cutadapt
|
||||||
on:
|
on:
|
||||||
push: {}
|
push:
|
||||||
|
paths:
|
||||||
|
- software/cutadapt/**
|
||||||
|
- .github/workflows/cutadapt.yml
|
||||||
|
- tests
|
||||||
pull_request:
|
pull_request:
|
||||||
paths: software/cutadapt/*
|
paths:
|
||||||
|
- software/cutadapt/**
|
||||||
|
- .github/workflows/cutadapt.yml
|
||||||
|
- tests
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_ci_test:
|
run_ci_test:
|
||||||
|
@ -12,13 +19,6 @@ jobs:
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
|
|
||||||
- name: Checkout submodules
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
auth_header="$(git config --local --get http.https://github.com/.extraheader)"
|
|
||||||
git submodule sync --recursive
|
|
||||||
git -c "http.extraheader=$auth_header" -c protocol.version=2 submodule update --init --force --recursive --depth=1
|
|
||||||
|
|
||||||
- name: Install Nextflow
|
- name: Install Nextflow
|
||||||
run: |
|
run: |
|
||||||
wget -qO- get.nextflow.io | bash
|
wget -qO- get.nextflow.io | bash
|
||||||
|
|
2
.github/workflows/docker.yml
vendored
2
.github/workflows/docker.yml
vendored
|
@ -14,7 +14,7 @@ jobs:
|
||||||
steps:
|
steps:
|
||||||
|
|
||||||
# Check out the repo
|
# Check out the repo
|
||||||
- uses: actions/checkout@v1
|
- uses: actions/checkout@v2
|
||||||
|
|
||||||
# Find the tool wrappers that changed
|
# Find the tool wrappers that changed
|
||||||
# Annoyingly, matrix can't take dynamic variables
|
# Annoyingly, matrix can't take dynamic variables
|
||||||
|
|
15
.github/workflows/fastqc.yml
vendored
15
.github/workflows/fastqc.yml
vendored
|
@ -1,9 +1,15 @@
|
||||||
name: FastQC
|
name: FastQC
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
paths: software/fastqc/**
|
paths:
|
||||||
|
- software/fastqc/**
|
||||||
|
- .github/workflows/fastqc.yml
|
||||||
|
- tests
|
||||||
pull_request:
|
pull_request:
|
||||||
paths: software/fastqc/**
|
paths:
|
||||||
|
- software/fastqc/**
|
||||||
|
- .github/workflows/fastqc.yml
|
||||||
|
- tests
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_ci_test:
|
run_ci_test:
|
||||||
|
@ -12,12 +18,11 @@ jobs:
|
||||||
NXF_ANSI_LOG: false
|
NXF_ANSI_LOG: false
|
||||||
steps:
|
steps:
|
||||||
|
|
||||||
# Check out the repository
|
- uses: actions/checkout@v2
|
||||||
- uses: actions/checkout@v1
|
|
||||||
submodules: true
|
|
||||||
|
|
||||||
- name: Install Nextflow
|
- name: Install Nextflow
|
||||||
run: |
|
run: |
|
||||||
|
export NXF_VER="20.06.0-edge"
|
||||||
wget -qO- get.nextflow.io | bash
|
wget -qO- get.nextflow.io | bash
|
||||||
sudo mv nextflow /usr/local/bin/
|
sudo mv nextflow /usr/local/bin/
|
||||||
|
|
||||||
|
|
2
.github/workflows/lint-code.yml
vendored
2
.github/workflows/lint-code.yml
vendored
|
@ -36,7 +36,7 @@ jobs:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Check out repository
|
- name: Check out repository
|
||||||
uses: actions/checkout@v1
|
uses: actions/checkout@v2
|
||||||
|
|
||||||
- name: Install NodeJS
|
- name: Install NodeJS
|
||||||
uses: actions/setup-node@v1
|
uses: actions/setup-node@v1
|
||||||
|
|
15
.github/workflows/samtools_index.yml
vendored
15
.github/workflows/samtools_index.yml
vendored
|
@ -1,8 +1,15 @@
|
||||||
name: samtools index
|
name: samtools index
|
||||||
on:
|
on:
|
||||||
push: {}
|
push:
|
||||||
|
paths:
|
||||||
|
- software/samtools/index/**
|
||||||
|
- .github/workflows/samtools_index.yml
|
||||||
|
- tests
|
||||||
pull_request:
|
pull_request:
|
||||||
paths: software/samtools/index*
|
paths:
|
||||||
|
- software/samtools/index/**
|
||||||
|
- .github/workflows/samtools_index.yml
|
||||||
|
- tests
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_ci_test:
|
run_ci_test:
|
||||||
|
@ -11,9 +18,7 @@ jobs:
|
||||||
NXF_ANSI_LOG: false
|
NXF_ANSI_LOG: false
|
||||||
steps:
|
steps:
|
||||||
|
|
||||||
# Check out the repository
|
- uses: actions/checkout@v2
|
||||||
- uses: actions/checkout@v1
|
|
||||||
submodules: true
|
|
||||||
|
|
||||||
- name: Install Nextflow
|
- name: Install Nextflow
|
||||||
run: |
|
run: |
|
||||||
|
|
15
.github/workflows/samtools_sort.yml
vendored
15
.github/workflows/samtools_sort.yml
vendored
|
@ -1,8 +1,15 @@
|
||||||
name: samtools sort
|
name: samtools sort
|
||||||
on:
|
on:
|
||||||
push: {}
|
push:
|
||||||
|
paths:
|
||||||
|
- software/samtools/sort**
|
||||||
|
- .github/workflows/samtools_sort.yml
|
||||||
|
- tests
|
||||||
pull_request:
|
pull_request:
|
||||||
paths: software/samtools/sort*
|
paths:
|
||||||
|
- software/samtools/sort**
|
||||||
|
- .github/workflows/samtools_sort.yml
|
||||||
|
- tests
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_ci_test:
|
run_ci_test:
|
||||||
|
@ -11,9 +18,7 @@ jobs:
|
||||||
NXF_ANSI_LOG: false
|
NXF_ANSI_LOG: false
|
||||||
steps:
|
steps:
|
||||||
|
|
||||||
# Check out the repository
|
- uses: actions/checkout@v2
|
||||||
- uses: actions/checkout@v1
|
|
||||||
submodules: true
|
|
||||||
|
|
||||||
- name: Install Nextflow
|
- name: Install Nextflow
|
||||||
run: |
|
run: |
|
||||||
|
|
18
.github/workflows/tcoffee.yml
vendored
18
.github/workflows/tcoffee.yml
vendored
|
@ -1,8 +1,15 @@
|
||||||
name: tcoffee
|
name: tcoffee
|
||||||
on:
|
on:
|
||||||
push: {}
|
push:
|
||||||
|
paths:
|
||||||
|
- software/tcoffee/**
|
||||||
|
- .github/workflows/tcoffee.yml
|
||||||
|
- tests
|
||||||
pull_request:
|
pull_request:
|
||||||
paths: software/tcoffee/*
|
paths:
|
||||||
|
- software/tcoffee/**
|
||||||
|
- .github/workflows/tcoffee.yml
|
||||||
|
- tests
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_ci_test:
|
run_ci_test:
|
||||||
|
@ -11,14 +18,7 @@ jobs:
|
||||||
NXF_ANSI_LOG: false
|
NXF_ANSI_LOG: false
|
||||||
steps:
|
steps:
|
||||||
|
|
||||||
# Check out the repository
|
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
- name: Checkout submodules
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
auth_header="$(git config --local --get http.https://github.com/.extraheader)"
|
|
||||||
git submodule sync --recursive
|
|
||||||
git -c "http.extraheader=$auth_header" -c protocol.version=2 submodule update --init --force --recursive --depth=1
|
|
||||||
|
|
||||||
- name: Install Nextflow
|
- name: Install Nextflow
|
||||||
run: |
|
run: |
|
||||||
|
|
15
.github/workflows/trim_galore.yml
vendored
15
.github/workflows/trim_galore.yml
vendored
|
@ -1,8 +1,15 @@
|
||||||
name: Trim Galore!
|
name: Trim Galore!
|
||||||
on:
|
on:
|
||||||
push: {}
|
push:
|
||||||
|
paths:
|
||||||
|
- software/trim_galore/**
|
||||||
|
- .github/workflows/trim_galore.yml
|
||||||
|
- tests
|
||||||
pull_request:
|
pull_request:
|
||||||
paths: software/trim_galore/*
|
paths:
|
||||||
|
- software/trim_galore/**
|
||||||
|
- .github/workflows/trim_galore.yml
|
||||||
|
- tests
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_ci_test:
|
run_ci_test:
|
||||||
|
@ -11,9 +18,7 @@ jobs:
|
||||||
NXF_ANSI_LOG: false
|
NXF_ANSI_LOG: false
|
||||||
steps:
|
steps:
|
||||||
|
|
||||||
# Check out the repository
|
- uses: actions/checkout@v2
|
||||||
- uses: actions/checkout@v1
|
|
||||||
submodules: true
|
|
||||||
|
|
||||||
- name: Install Nextflow
|
- name: Install Nextflow
|
||||||
run: |
|
run: |
|
||||||
|
|
21
README.md
21
README.md
|
@ -68,23 +68,30 @@ The key words "MUST", "MUST NOT", "SHOULD", etc. are to be interpreted as descri
|
||||||
### Defining inputs, outputs and parameters
|
### Defining inputs, outputs and parameters
|
||||||
- A module file SHOULD only define inputs and outputs as parameters. Additionally,
|
- A module file SHOULD only define inputs and outputs as parameters. Additionally,
|
||||||
- it MUST define threads or resources where required for a particular process using `task.cpus`
|
- it MUST define threads or resources where required for a particular process using `task.cpus`
|
||||||
- it MUST be possible to pass additional parameters to the tool as a command line string via the `params.<MODULE>_args` parameter.
|
- ~~it MUST be possible to pass additional parameters to the tool as a command line string via the `params.<MODULE>_args` parameter.~~
|
||||||
|
- it MUST be possible to pass additional parameters as a [nextflow Map](https://www.nextflow.io/docs/latest/script.html#maps) through an additional input channel `val(options)` [Details require discussion].
|
||||||
- All NGS modules MUST accept a triplet [name, single_end, reads] as input. The single-end boolean values MUST be specified through the input channel and not inferred from the data e.g. [here](https://github.com/nf-core/tools/blob/028a9b3f9d1ad044e879a1de13d3c3a25a06b9a7/nf_core/pipeline-template/%7B%7Bcookiecutter.name_noslash%7D%7D/modules/nf-core/fastqc.nf#L13).
|
- All NGS modules MUST accept a triplet [name, single_end, reads] as input. The single-end boolean values MUST be specified through the input channel and not inferred from the data e.g. [here](https://github.com/nf-core/tools/blob/028a9b3f9d1ad044e879a1de13d3c3a25a06b9a7/nf_core/pipeline-template/%7B%7Bcookiecutter.name_noslash%7D%7D/modules/nf-core/fastqc.nf#L13).
|
||||||
- Process names MUST be all uppercase.
|
- Process names MUST be all uppercase.
|
||||||
- Each process MUST emit a file `<TOOL>.version.txt` containing a single line with the software's version in the format `v<VERSION_NUMBER>`.
|
- Each process MUST emit a file `<TOOL>.version.txt` containing a single line with the software's version in the format `v<VERSION_NUMBER>`.
|
||||||
- All outputs MUST be named using `emit`.
|
- All outputs MUST be named using `emit`.
|
||||||
|
- A Process MUST NOT contain a `when` statement.
|
||||||
|
- Optional inputs need development on the nextflow side. In the meanwhile, "fake files" MAY be used to work around this issue.
|
||||||
|
|
||||||
### Atomicity
|
### Atomicity
|
||||||
- Software that can be piped together SHOULD be added to separate module files unless there is an run-time, storage advantage in implementing in this way e.g. `bwa mem | samtools view -C -T ref.fasta` to output CRAM instead of SAM.
|
- Software that can be piped together SHOULD be added to separate module files unless there is an run-time, storage advantage in implementing in this way e.g. `bwa mem | samtools view -C -T ref.fasta` to output CRAM instead of SAM.
|
||||||
|
|
||||||
|
### Resource requirements
|
||||||
|
- Each module MUST define a label `process_low`, `process_medium` or `process_high` to declare resource requirements. (*These flags will be ignored outside of nf-core and the pipeline developer is free to define adequate resource requirements*)
|
||||||
|
|
||||||
### Publishing results
|
### Publishing results
|
||||||
- The module MUST accept the parameters `params.out_dir` and `params.publish_dir` and MUST publish results into `${params.out_dir}/${params.publish_dir}`.
|
- The module MUST accept the parameters `params.out_dir` and `params.publish_dir` and MUST publish results into `${params.out_dir}/${params.publish_dir}`.
|
||||||
- The `publishDirMode` MUST be configurable via `params.publish_dir_mode`
|
- The `publishDirMode` MUST be configurable via `params.publish_dir_mode`
|
||||||
- The module MUST accept a parameter `params.publish_results` accepting at least
|
- The module MUST accept a parameter `params.publish_results` accepting at least
|
||||||
- `"none"`, to publish no files at all, and
|
- `"none"`, to publish no files at all,
|
||||||
- `"default"`, to publish a sensible selection of files.
|
- a glob pattern which is initalized to a sensible default value.
|
||||||
|
|
||||||
|
It MAY accept `"logs"` to publish relevant log files, or other flags, if applicable.
|
||||||
|
|
||||||
It MAY accept further options.
|
|
||||||
- To ensure consistent naming, files SHOULD be renamed according to the `$name` variable before returning them.
|
- To ensure consistent naming, files SHOULD be renamed according to the `$name` variable before returning them.
|
||||||
|
|
||||||
### Testing
|
### Testing
|
||||||
|
@ -93,14 +100,16 @@ The key words "MUST", "MUST NOT", "SHOULD", etc. are to be interpreted as descri
|
||||||
|
|
||||||
### Software requirements
|
### Software requirements
|
||||||
- Software requirements SHOULD be declared in a conda `environment.yml` file, including exact version numbers. Additionally, there MUST be a `Dockerfile` that containerizes the environment, or packages the software if conda is not available.
|
- Software requirements SHOULD be declared in a conda `environment.yml` file, including exact version numbers. Additionally, there MUST be a `Dockerfile` that containerizes the environment, or packages the software if conda is not available.
|
||||||
|
- Docker containers MUST BE identified by their `sha256(Dockerfile + environment.yml)`.
|
||||||
|
- Each module must have it's own `Dockerfile` and `environment.yml` file
|
||||||
|
- Care should be taken to maintain identical files for subcommands that use the same software. Then the hash tag will be the same and they will be implicitly re-used across subcommands.
|
||||||
|
|
||||||
### File formats
|
### File formats
|
||||||
- Wherever possible, [CRAM](https://en.wikipedia.org/wiki/CRAM_(file_format)) files SHOULD be used over BAM files.
|
- Wherever possible, [CRAM](https://en.wikipedia.org/wiki/CRAM_(file_format)) files SHOULD be used over BAM files.
|
||||||
- Wherever possible, FASTQ files SHOULD be compressed using gzip.
|
- Wherever possible, FASTQ files SHOULD be compressed using gzip.
|
||||||
|
|
||||||
### Documentation
|
### Documentation
|
||||||
|
- A module MUST be documented in the `meta.yml` file. It MUST document `params`, `input` and `output`. `input` and `output` MUST be a nested list. [Exact detail need to be elaborated. ]
|
||||||
Please add some documentation to the top of the module file in the form of native Nextflow comments. This has to be specified in a particular format as you will be able to see from other examples in the [`nf-core/modules/nf`](https://github.com/nf-core/modules/tree/master/nf) directory.
|
|
||||||
|
|
||||||
### Uploading to `nf-core/modules`
|
### Uploading to `nf-core/modules`
|
||||||
|
|
||||||
|
|
BIN
software/bowtie2/test/indices/E_coli/E_coli.1.bt2
Normal file
BIN
software/bowtie2/test/indices/E_coli/E_coli.1.bt2
Normal file
Binary file not shown.
BIN
software/bowtie2/test/indices/E_coli/E_coli.2.bt2
Normal file
BIN
software/bowtie2/test/indices/E_coli/E_coli.2.bt2
Normal file
Binary file not shown.
BIN
software/bowtie2/test/indices/E_coli/E_coli.3.bt2
Normal file
BIN
software/bowtie2/test/indices/E_coli/E_coli.3.bt2
Normal file
Binary file not shown.
BIN
software/bowtie2/test/indices/E_coli/E_coli.4.bt2
Normal file
BIN
software/bowtie2/test/indices/E_coli/E_coli.4.bt2
Normal file
Binary file not shown.
BIN
software/bowtie2/test/indices/E_coli/E_coli.rev.1.bt2
Normal file
BIN
software/bowtie2/test/indices/E_coli/E_coli.rev.1.bt2
Normal file
Binary file not shown.
BIN
software/bowtie2/test/indices/E_coli/E_coli.rev.2.bt2
Normal file
BIN
software/bowtie2/test/indices/E_coli/E_coli.rev.2.bt2
Normal file
Binary file not shown.
1
software/bowtie2/test/indices/E_coli/NC_010473.fa
Symbolic link
1
software/bowtie2/test/indices/E_coli/NC_010473.fa
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../../../../tests/data/fasta/E_coli/NC_010473.fa
|
1
software/bowtie2/test/input/Ecoli_DNA_R1.fastq.gz
Symbolic link
1
software/bowtie2/test/input/Ecoli_DNA_R1.fastq.gz
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../../../tests/data/fastq/dna/Ecoli_DNA_R1.fastq.gz
|
1
software/bowtie2/test/input/Ecoli_DNA_R2.fastq.gz
Symbolic link
1
software/bowtie2/test/input/Ecoli_DNA_R2.fastq.gz
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../../../tests/data/fastq/dna/Ecoli_DNA_R2.fastq.gz
|
1
software/bowtie2/test/input/test_R1_val_1.fq.gz
Symbolic link
1
software/bowtie2/test/input/test_R1_val_1.fq.gz
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../../../tests/data/fastq/rna/test_R1_val_1.fq.gz
|
1
software/bowtie2/test/input/test_R2_val_2.fq.gz
Symbolic link
1
software/bowtie2/test/input/test_R2_val_2.fq.gz
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../../../tests/data/fastq/rna/test_R2_val_2.fq.gz
|
BIN
software/bowtie2/test/output/Ecoli_DNA_R_E_coli_bowtie2.bam
Normal file
BIN
software/bowtie2/test/output/Ecoli_DNA_R_E_coli_bowtie2.bam
Normal file
Binary file not shown.
|
@ -0,0 +1,15 @@
|
||||||
|
10000 reads; of these:
|
||||||
|
10000 (100.00%) were paired; of these:
|
||||||
|
893 (8.93%) aligned concordantly 0 times
|
||||||
|
8474 (84.74%) aligned concordantly exactly 1 time
|
||||||
|
633 (6.33%) aligned concordantly >1 times
|
||||||
|
----
|
||||||
|
893 pairs aligned concordantly 0 times; of these:
|
||||||
|
815 (91.27%) aligned discordantly 1 time
|
||||||
|
----
|
||||||
|
78 pairs aligned 0 times concordantly or discordantly; of these:
|
||||||
|
156 mates make up the pairs; of these:
|
||||||
|
0 (0.00%) aligned 0 times
|
||||||
|
1 (0.64%) aligned exactly 1 time
|
||||||
|
155 (99.36%) aligned >1 times
|
||||||
|
100.00% overall alignment rate
|
BIN
software/bowtie2/test/output/test_GRCm38_bowtie2.bam
Normal file
BIN
software/bowtie2/test/output/test_GRCm38_bowtie2.bam
Normal file
Binary file not shown.
15
software/bowtie2/test/output/test_GRCm38_bowtie2_stats.txt
Normal file
15
software/bowtie2/test/output/test_GRCm38_bowtie2_stats.txt
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
9979 reads; of these:
|
||||||
|
9979 (100.00%) were paired; of these:
|
||||||
|
3584 (35.92%) aligned concordantly 0 times
|
||||||
|
3705 (37.13%) aligned concordantly exactly 1 time
|
||||||
|
2690 (26.96%) aligned concordantly >1 times
|
||||||
|
----
|
||||||
|
3584 pairs aligned concordantly 0 times; of these:
|
||||||
|
886 (24.72%) aligned discordantly 1 time
|
||||||
|
----
|
||||||
|
2698 pairs aligned 0 times concordantly or discordantly; of these:
|
||||||
|
5396 mates make up the pairs; of these:
|
||||||
|
2282 (42.29%) aligned 0 times
|
||||||
|
1467 (27.19%) aligned exactly 1 time
|
||||||
|
1647 (30.52%) aligned >1 times
|
||||||
|
88.57% overall alignment rate
|
1
software/fastq_screen/test/input/test_R1_val_1.fq.gz
Symbolic link
1
software/fastq_screen/test/input/test_R1_val_1.fq.gz
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../../../tests/data/fastq/rna/test_R1_val_1.fq.gz
|
1
software/fastq_screen/test/input/test_R2_val_2.fq.gz
Symbolic link
1
software/fastq_screen/test/input/test_R2_val_2.fq.gz
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../../../tests/data/fastq/rna/test_R2_val_2.fq.gz
|
31
software/fastq_screen/test/output/test_R1_screen.txt
Normal file
31
software/fastq_screen/test/output/test_R1_screen.txt
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
#Fastq_screen version: 0.14.0 #Aligner: bowtie2 #Reads in subset: 100000
|
||||||
|
Genome #Reads_processed #Unmapped %Unmapped #One_hit_one_genome %One_hit_one_genome #Multiple_hits_one_genome %Multiple_hits_one_genome #One_hit_multiple_genomes %One_hit_multiple_genomes Multiple_hits_multiple_genomes %Multiple_hits_multiple_genomes
|
||||||
|
Cat 10000 9171 91.71 0 0.00 0 0.00 421 4.21 408 4.08
|
||||||
|
Chicken 10000 8932 89.32 0 0.00 0 0.00 64 0.64 1004 10.04
|
||||||
|
Cow 10000 8484 84.84 0 0.00 0 0.00 294 2.94 1222 12.22
|
||||||
|
Drosophila 10000 9469 94.69 0 0.00 0 0.00 19 0.19 512 5.12
|
||||||
|
Human 10000 8367 83.67 2 0.02 3 0.03 354 3.54 1274 12.74
|
||||||
|
Mouse 10000 122 1.22 3265 32.65 869 8.69 2066 20.66 3678 36.78
|
||||||
|
Pig 10000 8459 84.59 0 0.00 0 0.00 334 3.34 1207 12.07
|
||||||
|
Rat 10000 6432 64.32 1 0.01 3 0.03 1334 13.34 2230 22.30
|
||||||
|
Zebrafish 10000 9125 91.25 0 0.00 0 0.00 41 0.41 834 8.34
|
||||||
|
Arabidopsis 10000 9497 94.97 0 0.00 0 0.00 5 0.05 498 4.98
|
||||||
|
Grape 10000 9600 96.00 0 0.00 1 0.01 82 0.82 317 3.17
|
||||||
|
Potato 10000 9460 94.60 0 0.00 0 0.00 12 0.12 528 5.28
|
||||||
|
Tomato 10000 9521 95.21 0 0.00 0 0.00 45 0.45 434 4.34
|
||||||
|
Adapters 10000 10000 100.00 0 0.00 0 0.00 0 0.00 0 0.00
|
||||||
|
Brachybacterium 10000 10000 100.00 0 0.00 0 0.00 0 0.00 0 0.00
|
||||||
|
Pseudomonas 10000 10000 100.00 0 0.00 0 0.00 0 0.00 0 0.00
|
||||||
|
Massilia_oculi 10000 9999 99.99 0 0.00 1 0.01 0 0.00 0 0.00
|
||||||
|
Ecoli 10000 9998 99.98 1 0.01 1 0.01 0 0.00 0 0.00
|
||||||
|
Lambda 10000 10000 100.00 0 0.00 0 0.00 0 0.00 0 0.00
|
||||||
|
MT 10000 7856 78.56 0 0.00 0 0.00 2034 20.34 110 1.10
|
||||||
|
PhiX 10000 10000 100.00 0 0.00 0 0.00 0 0.00 0 0.00
|
||||||
|
rRNA 10000 9157 91.57 0 0.00 0 0.00 111 1.11 732 7.32
|
||||||
|
Wasp 10000 9473 94.73 0 0.00 0 0.00 211 2.11 316 3.16
|
||||||
|
Vectors 10000 9713 97.13 0 0.00 0 0.00 52 0.52 235 2.35
|
||||||
|
Worm 10000 9645 96.45 0 0.00 0 0.00 13 0.13 342 3.42
|
||||||
|
Yeast 10000 9507 95.07 0 0.00 0 0.00 4 0.04 489 4.89
|
||||||
|
Mycoplasma 10000 9998 99.98 0 0.00 0 0.00 0 0.00 2 0.02
|
||||||
|
|
||||||
|
%Hit_no_genomes: 0.88
|
1
software/fastqc/test/input/test_R1.fastq.gz
Symbolic link
1
software/fastqc/test/input/test_R1.fastq.gz
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../../../tests/data/fastq/rna/test_R1.fastq.gz
|
1
software/fastqc/test/input/test_R1_val_1.fq.gz
Symbolic link
1
software/fastqc/test/input/test_R1_val_1.fq.gz
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../../../tests/data/fastq/rna/test_R1_val_1.fq.gz
|
1
software/fastqc/test/input/test_R2.fastq.gz
Symbolic link
1
software/fastqc/test/input/test_R2.fastq.gz
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../../../tests/data/fastq/rna/test_R2.fastq.gz
|
1
software/fastqc/test/input/test_R2_val_2.fq.gz
Symbolic link
1
software/fastqc/test/input/test_R2_val_2.fq.gz
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../../../tests/data/fastq/rna/test_R2_val_2.fq.gz
|
1
software/fastqc/test/input/test_single_end.fastq.gz
Symbolic link
1
software/fastqc/test/input/test_single_end.fastq.gz
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../../../tests/data/fastq/rna/test_single_end.fastq.gz
|
|
@ -12,24 +12,26 @@ include { FASTQC } from '../main.nf'
|
||||||
* Test if FASTQC runs with single-end data
|
* Test if FASTQC runs with single-end data
|
||||||
*/
|
*/
|
||||||
workflow test_single_end {
|
workflow test_single_end {
|
||||||
input_files = Channel.fromPath("data/test_single_end.fastq.gz")
|
input_files = Channel.fromPath("${baseDir}/input/test_single_end.fastq.gz")
|
||||||
.map {f -> [f.baseName, true, f]}
|
.map {f -> [f.name.replace(".fastq.gz", ""), true, f]}
|
||||||
FASTQC(input_files)
|
FASTQC(input_files)
|
||||||
|
|
||||||
// test that the output looks as expected
|
// test that the output looks as expected
|
||||||
FASTQC.out.html.map { name, is_single_end, html_file ->
|
FASTQC.out.html.map { name, is_single_end, html_file ->
|
||||||
html_hash = checksum.getMD5(new File("${html_file}"));
|
html_hash = checksum.getMD5(new File("${html_file}"));
|
||||||
|
|
||||||
assert name == "test_single_end.fastq"
|
assert name == "test_single_end"
|
||||||
assert is_single_end == true
|
assert is_single_end == true
|
||||||
assert html_file.getName() == "test_single_end.fastq_fastqc.html"
|
assert html_file.getName() == "test_single_end_fastqc.html"
|
||||||
assert html_hash == "ff04679b50beabdbd9e93db646f5667d"
|
// Hash seems to vary between local runs and GitHub Actions
|
||||||
|
// TODO: Might be solved when using Docker for tests?
|
||||||
|
// assert html_hash == "8ed68442ebb5b9706bf79b4f66701e15"
|
||||||
}
|
}
|
||||||
FASTQC.out.zip.map { name, is_single_end, zip_file ->
|
FASTQC.out.zip.map { name, is_single_end, zip_file ->
|
||||||
// NOTE: output zip files do not have a consistent hash
|
// NOTE: output zip files do not have a consistent hash
|
||||||
assert name == "test_single_end.fastq"
|
assert name == "test_single_end"
|
||||||
assert is_single_end == true
|
assert is_single_end == true
|
||||||
assert zip_file.getName() == "test_single_end.fastq_fastqc.zip"
|
assert zip_file.getName() == "test_single_end_fastqc.zip"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -37,7 +39,7 @@ workflow test_single_end {
|
||||||
* Test if FASTQC runs with paired end data
|
* Test if FASTQC runs with paired end data
|
||||||
*/
|
*/
|
||||||
workflow test_paired_end {
|
workflow test_paired_end {
|
||||||
input_files = Channel.fromFilePairs("data/test_R{1,2}.fastq.gz")
|
input_files = Channel.fromFilePairs("input/test_R{1,2}.fastq.gz")
|
||||||
.map {f -> [f[0], false, f[1]]}
|
.map {f -> [f[0], false, f[1]]}
|
||||||
FASTQC(input_files)
|
FASTQC(input_files)
|
||||||
|
|
||||||
|
|
187
software/fastqc/test/output/test_R1_fastqc.html
Normal file
187
software/fastqc/test/output/test_R1_fastqc.html
Normal file
File diff suppressed because one or more lines are too long
BIN
software/fastqc/test/output/test_R1_fastqc.zip
Normal file
BIN
software/fastqc/test/output/test_R1_fastqc.zip
Normal file
Binary file not shown.
187
software/fastqc/test/output/test_R1_val_1_fastqc.html
Normal file
187
software/fastqc/test/output/test_R1_val_1_fastqc.html
Normal file
File diff suppressed because one or more lines are too long
BIN
software/fastqc/test/output/test_R1_val_1_fastqc.zip
Normal file
BIN
software/fastqc/test/output/test_R1_val_1_fastqc.zip
Normal file
Binary file not shown.
187
software/fastqc/test/output/test_R2_fastqc.html
Normal file
187
software/fastqc/test/output/test_R2_fastqc.html
Normal file
File diff suppressed because one or more lines are too long
BIN
software/fastqc/test/output/test_R2_fastqc.zip
Normal file
BIN
software/fastqc/test/output/test_R2_fastqc.zip
Normal file
Binary file not shown.
187
software/fastqc/test/output/test_R2_val_2_fastqc.html
Normal file
187
software/fastqc/test/output/test_R2_val_2_fastqc.html
Normal file
File diff suppressed because one or more lines are too long
BIN
software/fastqc/test/output/test_R2_val_2_fastqc.zip
Normal file
BIN
software/fastqc/test/output/test_R2_val_2_fastqc.zip
Normal file
Binary file not shown.
BIN
software/hisat2/test/indices/E_coli/E_coli.1.ht2
Normal file
BIN
software/hisat2/test/indices/E_coli/E_coli.1.ht2
Normal file
Binary file not shown.
BIN
software/hisat2/test/indices/E_coli/E_coli.2.ht2
Normal file
BIN
software/hisat2/test/indices/E_coli/E_coli.2.ht2
Normal file
Binary file not shown.
BIN
software/hisat2/test/indices/E_coli/E_coli.3.ht2
Normal file
BIN
software/hisat2/test/indices/E_coli/E_coli.3.ht2
Normal file
Binary file not shown.
BIN
software/hisat2/test/indices/E_coli/E_coli.4.ht2
Normal file
BIN
software/hisat2/test/indices/E_coli/E_coli.4.ht2
Normal file
Binary file not shown.
BIN
software/hisat2/test/indices/E_coli/E_coli.5.ht2
Normal file
BIN
software/hisat2/test/indices/E_coli/E_coli.5.ht2
Normal file
Binary file not shown.
BIN
software/hisat2/test/indices/E_coli/E_coli.6.ht2
Normal file
BIN
software/hisat2/test/indices/E_coli/E_coli.6.ht2
Normal file
Binary file not shown.
BIN
software/hisat2/test/indices/E_coli/E_coli.7.ht2
Normal file
BIN
software/hisat2/test/indices/E_coli/E_coli.7.ht2
Normal file
Binary file not shown.
BIN
software/hisat2/test/indices/E_coli/E_coli.8.ht2
Normal file
BIN
software/hisat2/test/indices/E_coli/E_coli.8.ht2
Normal file
Binary file not shown.
1
software/hisat2/test/indices/E_coli/NC_010473.fa
Symbolic link
1
software/hisat2/test/indices/E_coli/NC_010473.fa
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../../../../tests/data/fasta/E_coli/NC_010473.fa
|
1
software/hisat2/test/input/Ecoli_DNA_R1.fastq.gz
Symbolic link
1
software/hisat2/test/input/Ecoli_DNA_R1.fastq.gz
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../../../tests/data/fastq/dna/Ecoli_DNA_R1.fastq.gz
|
1
software/hisat2/test/input/Ecoli_DNA_R2.fastq.gz
Symbolic link
1
software/hisat2/test/input/Ecoli_DNA_R2.fastq.gz
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../../../tests/data/fastq/dna/Ecoli_DNA_R2.fastq.gz
|
BIN
software/hisat2/test/output/Ecoli_DNA_R_E_coli_hisat2.bam
Normal file
BIN
software/hisat2/test/output/Ecoli_DNA_R_E_coli_hisat2.bam
Normal file
Binary file not shown.
|
@ -0,0 +1,6 @@
|
||||||
|
10000 reads; of these:
|
||||||
|
10000 (100.00%) were paired; of these:
|
||||||
|
823 (8.23%) aligned concordantly 0 times
|
||||||
|
8583 (85.83%) aligned concordantly exactly 1 time
|
||||||
|
594 (5.94%) aligned concordantly >1 times
|
||||||
|
91.77% overall alignment rate
|
|
@ -0,0 +1 @@
|
||||||
|
../../../../bowtie2/test/output/test_GRCm38_bowtie2_stats.txt
|
1
software/multiqc/test/input/fastq_screen/test_R1_screen.txt
Symbolic link
1
software/multiqc/test/input/fastq_screen/test_R1_screen.txt
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../../../fastq_screen/test/output/test_R1_screen.txt
|
1
software/multiqc/test/input/fastqc/test_R1_fastqc.zip
Symbolic link
1
software/multiqc/test/input/fastqc/test_R1_fastqc.zip
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../../../fastqc/test/output/test_R1_fastqc.zip
|
1
software/multiqc/test/input/fastqc/test_R1_val_1_fastqc.zip
Symbolic link
1
software/multiqc/test/input/fastqc/test_R1_val_1_fastqc.zip
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../../../fastqc/test/output/test_R1_val_1_fastqc.zip
|
1
software/multiqc/test/input/fastqc/test_R2_fastqc.zip
Symbolic link
1
software/multiqc/test/input/fastqc/test_R2_fastqc.zip
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../../../fastqc/test/output/test_R2_fastqc.zip
|
1
software/multiqc/test/input/fastqc/test_R2_val_2_fastqc.zip
Symbolic link
1
software/multiqc/test/input/fastqc/test_R2_val_2_fastqc.zip
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../../../fastqc/test/output/test_R2_val_2_fastqc.zip
|
|
@ -0,0 +1 @@
|
||||||
|
../../../../hisat2/test/output/Ecoli_DNA_R_E_coli_hisat2_stats.txt
|
|
@ -0,0 +1 @@
|
||||||
|
../../../../trim_galore/test/output/test_R1.fastq.gz_trimming_report.txt
|
|
@ -0,0 +1 @@
|
||||||
|
../../../../trim_galore/test/output/test_R2.fastq.gz_trimming_report.txt
|
6244
software/multiqc/test/output/multiqc_report.html
Normal file
6244
software/multiqc/test/output/multiqc_report.html
Normal file
File diff suppressed because one or more lines are too long
1
software/trim_galore/test/input/test_R1.fastq.gz
Symbolic link
1
software/trim_galore/test/input/test_R1.fastq.gz
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../../../tests/data/fastq/rna/test_R1.fastq.gz
|
1
software/trim_galore/test/input/test_R2.fastq.gz
Symbolic link
1
software/trim_galore/test/input/test_R2.fastq.gz
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../../../tests/data/fastq/rna/test_R2.fastq.gz
|
|
@ -0,0 +1,97 @@
|
||||||
|
|
||||||
|
SUMMARISING RUN PARAMETERS
|
||||||
|
==========================
|
||||||
|
Input filename: test_R1.fastq.gz
|
||||||
|
Trimming mode: paired-end
|
||||||
|
Trim Galore version: 0.6.5
|
||||||
|
Cutadapt version: 2.3
|
||||||
|
Number of cores used for trimming: 1
|
||||||
|
Quality Phred score cutoff: 20
|
||||||
|
Quality encoding type selected: ASCII+33
|
||||||
|
Using Nextera adapter for trimming (count: 83). Second best hit was smallRNA (count: 0)
|
||||||
|
Adapter sequence: 'CTGTCTCTTATA' (Nextera Transposase sequence; auto-detected)
|
||||||
|
Maximum trimming error rate: 0.1 (default)
|
||||||
|
Minimum required adapter overlap (stringency): 1 bp
|
||||||
|
Minimum required sequence length for both reads before a sequence pair gets removed: 20 bp
|
||||||
|
Output file will be GZIP compressed
|
||||||
|
|
||||||
|
|
||||||
|
This is cutadapt 2.3 with Python 3.7.3
|
||||||
|
Command line parameters: -j 1 -e 0.1 -q 20 -O 1 -a CTGTCTCTTATA test_R1.fastq.gz
|
||||||
|
Processing reads on 1 core in single-end mode ...
|
||||||
|
Finished in 0.19 s (19 us/read; 3.12 M reads/minute).
|
||||||
|
|
||||||
|
=== Summary ===
|
||||||
|
|
||||||
|
Total reads processed: 10,000
|
||||||
|
Reads with adapters: 3,225 (32.2%)
|
||||||
|
Reads written (passing filters): 10,000 (100.0%)
|
||||||
|
|
||||||
|
Total basepairs processed: 760,000 bp
|
||||||
|
Quality-trimmed: 4,492 bp (0.6%)
|
||||||
|
Total written (filtered): 748,403 bp (98.5%)
|
||||||
|
|
||||||
|
=== Adapter 1 ===
|
||||||
|
|
||||||
|
Sequence: CTGTCTCTTATA; Type: regular 3'; Length: 12; Trimmed: 3225 times.
|
||||||
|
|
||||||
|
No. of allowed errors:
|
||||||
|
0-9 bp: 0; 10-12 bp: 1
|
||||||
|
|
||||||
|
Bases preceding removed adapters:
|
||||||
|
A: 23.8%
|
||||||
|
C: 28.2%
|
||||||
|
G: 22.7%
|
||||||
|
T: 25.3%
|
||||||
|
none/other: 0.0%
|
||||||
|
|
||||||
|
Overview of removed sequences
|
||||||
|
length count expect max.err error counts
|
||||||
|
1 2170 2500.0 0 2170
|
||||||
|
2 622 625.0 0 622
|
||||||
|
3 223 156.2 0 223
|
||||||
|
4 64 39.1 0 64
|
||||||
|
5 14 9.8 0 14
|
||||||
|
6 9 2.4 0 9
|
||||||
|
7 8 0.6 0 8
|
||||||
|
8 5 0.2 0 5
|
||||||
|
9 4 0.0 0 4
|
||||||
|
10 8 0.0 1 7 1
|
||||||
|
11 3 0.0 1 3
|
||||||
|
12 4 0.0 1 4
|
||||||
|
13 6 0.0 1 6
|
||||||
|
14 5 0.0 1 4 1
|
||||||
|
15 5 0.0 1 5
|
||||||
|
16 6 0.0 1 5 1
|
||||||
|
17 3 0.0 1 3
|
||||||
|
18 3 0.0 1 3
|
||||||
|
19 1 0.0 1 1
|
||||||
|
20 3 0.0 1 3
|
||||||
|
21 7 0.0 1 7
|
||||||
|
22 7 0.0 1 7
|
||||||
|
23 3 0.0 1 3
|
||||||
|
24 6 0.0 1 6
|
||||||
|
25 4 0.0 1 4
|
||||||
|
26 2 0.0 1 2
|
||||||
|
27 4 0.0 1 4
|
||||||
|
28 1 0.0 1 1
|
||||||
|
29 3 0.0 1 3
|
||||||
|
30 4 0.0 1 4
|
||||||
|
32 3 0.0 1 3
|
||||||
|
33 2 0.0 1 1 1
|
||||||
|
34 1 0.0 1 1
|
||||||
|
35 1 0.0 1 1
|
||||||
|
40 1 0.0 1 1
|
||||||
|
42 1 0.0 1 0 1
|
||||||
|
45 1 0.0 1 0 1
|
||||||
|
49 1 0.0 1 0 1
|
||||||
|
52 1 0.0 1 0 1
|
||||||
|
56 2 0.0 1 0 2
|
||||||
|
59 1 0.0 1 0 1
|
||||||
|
67 1 0.0 1 0 1
|
||||||
|
70 2 0.0 1 0 2
|
||||||
|
|
||||||
|
RUN STATISTICS FOR INPUT FILE: test_R1.fastq.gz
|
||||||
|
=============================================
|
||||||
|
10000 sequences processed in total
|
||||||
|
|
1
software/trim_galore/test/output/test_R1_val_1.fq.gz
Symbolic link
1
software/trim_galore/test/output/test_R1_val_1.fq.gz
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../../../tests/data/fastq/rna/test_R1_val_1.fq.gz
|
|
@ -0,0 +1,100 @@
|
||||||
|
|
||||||
|
SUMMARISING RUN PARAMETERS
|
||||||
|
==========================
|
||||||
|
Input filename: test_R2.fastq.gz
|
||||||
|
Trimming mode: paired-end
|
||||||
|
Trim Galore version: 0.6.5
|
||||||
|
Cutadapt version: 2.3
|
||||||
|
Number of cores used for trimming: 1
|
||||||
|
Quality Phred score cutoff: 20
|
||||||
|
Quality encoding type selected: ASCII+33
|
||||||
|
Using Nextera adapter for trimming (count: 83). Second best hit was smallRNA (count: 0)
|
||||||
|
Adapter sequence: 'CTGTCTCTTATA' (Nextera Transposase sequence; auto-detected)
|
||||||
|
Maximum trimming error rate: 0.1 (default)
|
||||||
|
Minimum required adapter overlap (stringency): 1 bp
|
||||||
|
Minimum required sequence length for both reads before a sequence pair gets removed: 20 bp
|
||||||
|
Output file will be GZIP compressed
|
||||||
|
|
||||||
|
|
||||||
|
This is cutadapt 2.3 with Python 3.7.3
|
||||||
|
Command line parameters: -j 1 -e 0.1 -q 20 -O 1 -a CTGTCTCTTATA test_R2.fastq.gz
|
||||||
|
Processing reads on 1 core in single-end mode ...
|
||||||
|
Finished in 0.22 s (22 us/read; 2.71 M reads/minute).
|
||||||
|
|
||||||
|
=== Summary ===
|
||||||
|
|
||||||
|
Total reads processed: 10,000
|
||||||
|
Reads with adapters: 3,295 (33.0%)
|
||||||
|
Reads written (passing filters): 10,000 (100.0%)
|
||||||
|
|
||||||
|
Total basepairs processed: 760,000 bp
|
||||||
|
Quality-trimmed: 7,096 bp (0.9%)
|
||||||
|
Total written (filtered): 745,649 bp (98.1%)
|
||||||
|
|
||||||
|
=== Adapter 1 ===
|
||||||
|
|
||||||
|
Sequence: CTGTCTCTTATA; Type: regular 3'; Length: 12; Trimmed: 3295 times.
|
||||||
|
|
||||||
|
No. of allowed errors:
|
||||||
|
0-9 bp: 0; 10-12 bp: 1
|
||||||
|
|
||||||
|
Bases preceding removed adapters:
|
||||||
|
A: 22.6%
|
||||||
|
C: 28.2%
|
||||||
|
G: 23.6%
|
||||||
|
T: 25.6%
|
||||||
|
none/other: 0.0%
|
||||||
|
|
||||||
|
Overview of removed sequences
|
||||||
|
length count expect max.err error counts
|
||||||
|
1 2213 2500.0 0 2213
|
||||||
|
2 647 625.0 0 647
|
||||||
|
3 239 156.2 0 239
|
||||||
|
4 53 39.1 0 53
|
||||||
|
5 10 9.8 0 10
|
||||||
|
6 7 2.4 0 7
|
||||||
|
7 8 0.6 0 8
|
||||||
|
8 5 0.2 0 5
|
||||||
|
9 5 0.0 0 5
|
||||||
|
10 10 0.0 1 8 2
|
||||||
|
11 2 0.0 1 2
|
||||||
|
12 4 0.0 1 4
|
||||||
|
13 7 0.0 1 7
|
||||||
|
14 3 0.0 1 3
|
||||||
|
15 4 0.0 1 4
|
||||||
|
16 5 0.0 1 5
|
||||||
|
17 3 0.0 1 3
|
||||||
|
18 5 0.0 1 4 1
|
||||||
|
19 2 0.0 1 1 1
|
||||||
|
20 3 0.0 1 3
|
||||||
|
21 7 0.0 1 7
|
||||||
|
22 6 0.0 1 6
|
||||||
|
23 3 0.0 1 3
|
||||||
|
24 7 0.0 1 7
|
||||||
|
25 4 0.0 1 4
|
||||||
|
26 2 0.0 1 2
|
||||||
|
27 4 0.0 1 4
|
||||||
|
28 1 0.0 1 1
|
||||||
|
29 3 0.0 1 3
|
||||||
|
30 4 0.0 1 4
|
||||||
|
32 3 0.0 1 3
|
||||||
|
33 1 0.0 1 1
|
||||||
|
34 1 0.0 1 1
|
||||||
|
35 2 0.0 1 1 1
|
||||||
|
40 1 0.0 1 0 1
|
||||||
|
41 1 0.0 1 1
|
||||||
|
46 1 0.0 1 0 1
|
||||||
|
48 1 0.0 1 0 1
|
||||||
|
49 2 0.0 1 0 2
|
||||||
|
56 2 0.0 1 0 2
|
||||||
|
59 1 0.0 1 0 1
|
||||||
|
70 1 0.0 1 0 1
|
||||||
|
73 2 0.0 1 0 2
|
||||||
|
|
||||||
|
RUN STATISTICS FOR INPUT FILE: test_R2.fastq.gz
|
||||||
|
=============================================
|
||||||
|
10000 sequences processed in total
|
||||||
|
|
||||||
|
Total number of sequences analysed for the sequence pair length validation: 10000
|
||||||
|
|
||||||
|
Number of sequence pairs removed because at least one read was shorter than the length cutoff (20 bp): 21 (0.21%)
|
1
software/trim_galore/test/output/test_R2_val_2.fq.gz
Symbolic link
1
software/trim_galore/test/output/test_R2_val_2.fq.gz
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../../../tests/data/fastq/rna/test_R2_val_2.fq.gz
|
66946
tests/data/fasta/E_coli/NC_010473.fa
Normal file
66946
tests/data/fasta/E_coli/NC_010473.fa
Normal file
File diff suppressed because it is too large
Load diff
265
tests/data/fasta/msa/BBA0001.tfa
Normal file
265
tests/data/fasta/msa/BBA0001.tfa
Normal file
|
@ -0,0 +1,265 @@
|
||||||
|
>seq001
|
||||||
|
MAGKRKRANAPDQTERRSSVRVQKVRQKALDEKARLVQERVKLLSDRKSEICVDDTELHE
|
||||||
|
KEEENVDGSPKRRSPPKLTAMQKGKQKLSVSLNGKDVNLEPHLKVTKCLRLFNKQYLLCV
|
||||||
|
QAKLSRPDLKGVTEMIKAKAILYPRKIIGDLPGIDVGHRFFSRAEMCAVGFHNHWLNGID
|
||||||
|
YMSMEYEKEYSNYKLPLAVSIVMSGQYEDDLDNADTVTYTGQGGHNLTGNKRQIKDQLLE
|
||||||
|
RGNLALKHCCEYNVPVRVTRGHNCKSSYTKRVYTYDGLYKVEKFWAQKGVSGFTVYKYRL
|
||||||
|
KRLEGQPELTTDQVNFVAGRIPTSTSEIEGLVCEDISGGLEFKGIPATNRVDDSPVSPTS
|
||||||
|
GFTYIKSLIIEPNVIIPKSSTGCNCRGSCTDSKKCACAKLNGGNFPYVDLNDGRLIESRD
|
||||||
|
VVFECGPHCGCGPKCVNRTSQKRLRFNLEVFRSAKKGWAVRSWEYIPAGSPVCEYIGVVR
|
||||||
|
RTADVDTISDNEYIFEIDCQQTMQGLGGRQRRLRDVAVPMNNGVSQSSEDENAPEFCIDA
|
||||||
|
GSTGNFARFINHSCEPNLFVQCVLSSHQDIRLARVVLFAADNISPMQELTYDYGYALDSV
|
||||||
|
HGPDGKVKQLACYCGALNCRKRLY
|
||||||
|
>seq002
|
||||||
|
MPRHFGAVPGVVPGMAFVNRQELRDAGVHLPTQAGISGSASEGADSIVLSGGYEDDRDEG
|
||||||
|
DVILYTGEGGRDPLTGHQVKPQQLVRGNLALAISHRDGLPLRVTRGHRHSSQFSPQSGYQ
|
||||||
|
YAGLYRVDDHWREVGRSGFLIWRFRLTRLENQDAHHAGADPQHPDSQHPERRPTLVQRIV
|
||||||
|
RDTATARAVKALYDHRCQVCGERLETPAGAYAEAAHIRPLGAPHHGPDVAGNILCLCPNH
|
||||||
|
HVLFDFGAFSVGDDLRLLGLPGRLHVHPQHAVDREHLAYHRRHYALQAGLEWGCSVPLT
|
||||||
|
>seq003
|
||||||
|
MGVMENLMVHTEISKVKSQSNGEVEKRGVSVLENGGVCKLDRMSGLKFKRRKVFAVRDFP
|
||||||
|
PGCGSRAMEVKIACENGNVVEDVKVVESLVKEEESLGQRDASENVSDIRMAEPVEVQPLR
|
||||||
|
ICLPGGDVVRDLSVTAGDECSNSEQIVAGSGVSSSSGTENIVRDIVVYADESSLGMDNLD
|
||||||
|
QTQPLEIEMSDVAVAKPRLVAGRKKAKKGIACHSSLKVVSREFGEGSRKKKSKKNLYWRD
|
||||||
|
RESLDSPEQLRILGVGTSSGSSSGDSSRNKVKETLRLFHGVCRKILQEDEAKPEDQRRKG
|
||||||
|
KGLRIDFEASTILKRNGKFLNSGVHILGEVPGVEVGDEFQYRMELNILGIHKPSQAGIDY
|
||||||
|
MKYGKAKVATSIVASGGYDDHLDNSDVLTYTGQGGNVMQVKKKGEELKEPEDQKLITGNL
|
||||||
|
ALATSIEKQTPVRVIRGKHKSTHDKSKGGNYVYDGLYLVEKYWQQVGSHGMNVFKFQLRR
|
||||||
|
IPGQPELSWVEVKKSKSKYREGLCKLDISEGKEQSPISAVNEIDDEKPPLFTYTVKLIYP
|
||||||
|
DWCRPVPPKSCCCTTRCTEAEARVCACVEKNGGEIPYNFDGAIVGAKPTIYECGPLCKCP
|
||||||
|
SSCYLRVTQHGIKLPLEIFKTKSRGWGVRCLKSIPIGSFICEYVGELLEDSEAERRIGND
|
||||||
|
EYLFDIGNRYDNSLAQGMSELMLGTQAGRSMAEGDESSGFTIDAASKGNVGRFINHSCSP
|
||||||
|
NLYAQNVLYDHEDSRIPHVMFFAQDNIPPLQELCYDYNYALDQVRDSKGNIKQKPCFCGA
|
||||||
|
AVCRRRLY
|
||||||
|
>seq004
|
||||||
|
MSTLLPFPDLNLMPDSQSSTAGTTAGDTVVTGKLEVKSEPIEEWQTPPSSTSDQSANTDL
|
||||||
|
IAEFIRISELFRSAFKPLQVKGLDGVSVYGLDSGAIVAVPEKENRELIEPPPGFKDNRVS
|
||||||
|
TVVVSPKFERPRELARIAILGHEQRKELRQVMKRTRMTYESLRIHLMAESMKNHVLGQGR
|
||||||
|
RRRSDMAAAYIMRDRGLWLNYDKHIVGPVTGVEVGDIFFYRMELCVLGLHGQTQAGIDCL
|
||||||
|
TAERSATGEPIATSIVVSGGYEDDEDTGDVLVYTGHGGQDHQHKQCDNQRLVGGNLGMER
|
||||||
|
SMHYGIEVRVIRGIKYENSISSKVYVYDGLYKIVDWWFAVGKSGFGVFKFRLVRIEGQPM
|
||||||
|
MGSAVMRFAQTLRNKPSMVRPTGYVSFDLSNKKENVPVFLYNDVDGDQEPRHYEYIAKAV
|
||||||
|
FPPGIFGQGGISRTGCECKLSCTDDCLCARKNGGEFAYDDNGHLLKGKHVVFECGEFCTC
|
||||||
|
GPSCKSRVTQKGLRNRLEVFRSKETGWGVRTLDLIEAGAFICEYAGVVVTRLQAEILSMN
|
||||||
|
GDVMVYPGRFTDQWRNWGDLSQVYPDFVRPNYPSLPPLDFSMDVSRMRNVACYISHSKEP
|
||||||
|
NVMVQFVLHDHNHLMFPRVMLFALENISPLAELSLDYGLADEVNGKLAICN
|
||||||
|
>seq005
|
||||||
|
MVHSESSILSSLRGGDGGGIPCSKDELAINGSYTDPMGRRKSKRFKVAAESEFSPDFGSI
|
||||||
|
TRQLRSRRMQKEFTVETYETRNVSDVCVLSSQADVELIPGEIVAERDSFKSVDCNDMSVG
|
||||||
|
LTEGAESLGVNMQEPMKDRNMPENTSEQNMVEVHPPSISLPEEDMMGSVCRKSITGTKEL
|
||||||
|
HGRTISVGRDLSPNMGSKFSKNGKTAKRSISVEEENLVLEKSDSGDHLGPSPEVLELEKS
|
||||||
|
EVWIITDKGVVMPSPVKPSEKRNGDYGEGSMRKNSERVALDKKRLASKFRLSNGGLPSCS
|
||||||
|
SSGDSARYKVKETMRLFHETCKKIMQEEEARPRKRDGGNFKVVCEASKILKSKGKNLYSG
|
||||||
|
TQIIGTVPGVEVGDEFQYRMELNLLGIHRPSQSGIDYMKDDGGELVATSIVSSGGYNDVL
|
||||||
|
DNSDVLIYTGQGGNVGKKKNNEPPKDQQLVTGNLALKNSINKKNPVRVIRGIKNTTLQSS
|
||||||
|
VVAKNYVYDGLYLVEEYWEETGSHGKLVFKFKLRRIPGQPELPWKEVAKSKKSEFRDGLC
|
||||||
|
NVDITEGKETLPICAVNNLDDEKPPPFIYTAKMIYPDWCRPIPPKSCGCTNGCSKSKNCA
|
||||||
|
CIVKNGGKIPYYDGAIVEIKPLVYECGPHCKCPPSCNMRVSQHGIKIKLEIFKTESRGWG
|
||||||
|
VRSLESIPIGSFICEYAGELLEDKQAESLTGKDEYLFDLGDEDDPFTINAAQKGNIGRFI
|
||||||
|
NHSCSPNLYAQDVLYDHEEIRIPHIMFFALDNIPPLQELSYDYNYKIDQVYDSNGNIKKK
|
||||||
|
FCYCGSAECSGRLY
|
||||||
|
>seq006
|
||||||
|
MERNGGHYTDKTRVLDIKPLRTLRPVFPSGNQAPPFVCAPPFGPFPPGFSSFYPFSSSQA
|
||||||
|
NQHTPDLNQAQYPPQHQQPQNPPPVYQQQPPQHASEPSLVTPLRSFRSPDVSNGNAELEG
|
||||||
|
STVKRRIPKKRPISRPENMNFESGINVADRENGNRELVLSVLMRFDALRRRFAQLEDAKE
|
||||||
|
AVSGIIKRPDLKSGSTCMGRGVRTNTKKRPGIVPGVEIGDVFFFRFEMCLVGLHSPSMAG
|
||||||
|
IDYLVVKGETEEEPIATSIVSSGYYDNDEGNPDVLIYTGQGGNADKDKQSSDQKLERGNL
|
||||||
|
ALEKSLRRDSAVRVIRGLKEASHNAKIYIYDGLYEIKESWVEKGKSGHNTFKYKLVRAPG
|
||||||
|
QPPAFASWTAIQKWKTGVPSRQGLILPDMTSGVESIPVSLVNEVDTDNGPAYFTYSTTVK
|
||||||
|
YSESFKLMQPSFGCDCANLCKPGNLDCHCIRKNGGDFPYTGNGILVSRKPMIYECSPSCP
|
||||||
|
CSTCKNKVTQMGVKVRLEVFKTANRGWGLRSWDAIRAGSFICIYVGEAKDKSKVQQTMAN
|
||||||
|
DDYTFDTTNVYNPFKWNYEPGLADEDACEEMSEESEIPLPLIISAKNVGNVARFMNHSCS
|
||||||
|
PNVFWQPVSYENNSQLFVHVAFFAISHIPPMTELTYDYGVSRPSGTQNGNPLYGKRKCFC
|
||||||
|
GSAYCRGSFG
|
||||||
|
>seq007
|
||||||
|
MQGVPGFNTVPNPNHYDKSIVLDIKPLRSLKPVFPNGNQGPPFVGCPPFGPSSSEYSSFF
|
||||||
|
PFGAQQPTHDTPDLNQTQNTPIPSFVPPLRSYRTPTKTNGPSSSSGTKRGVGRPKGTTSV
|
||||||
|
KKKEKKTVANEPNLDVQVVKKFSSDFDSGISAAEREDGNAYLVSSVLMRFDAVRRRLSQV
|
||||||
|
EFTKSATSKAAGTLMSNGVRTNMKKRVGTVPGIEVGDIFFSRIEMCLVGLHMQTMAGIDY
|
||||||
|
IISKAGSDEESLATSIVSSGRYEGEAQDPESLIYSGQGGNADKNRQASDQKLERGNLALE
|
||||||
|
NSLRKGNGVRVVRGEEDAASKTGKIYIYDGLYSISESWVEKGKSGCNTFKYKLVRQPGQP
|
||||||
|
PAFGFWKSVQKWKEGLTTRPGLILPDLTSGAESKPVSLVNDVDEDKGPAYFTYTSSLKYS
|
||||||
|
ETFKLTQPVIGCSCSGSCSPGNHNCSCIRKNDGDLPYLNGVILVSRRPVIYECGPTCPCH
|
||||||
|
ASCKNRVIQTGLKSRLEVFKTRNRGWGLRSWDSLRAGSFICEYAGEVKDNGNLRGNQEED
|
||||||
|
AYVFDTSRVFNSFKWNYEPELVDEDPSTEVPEEFNLPSPLLISAKKFGNVARFMNHSCSP
|
||||||
|
NVFWQPVIREGNGESVIHIAFFAMRHIPPMAELTYDYGISPTSEARDESLLHGQRTCLCG
|
||||||
|
SEQCRGSFG
|
||||||
|
>seq008
|
||||||
|
MGSSHIPLDPSLNPSPSLIPKLEPVTESTQNLAFQLPNTNPQALISSAVSDFNEATDFSS
|
||||||
|
DYNTVAESARSAFAQRLQRHDDVAVLDSLTGAIVPVEENPEPEPNPYSTSDSSPSVATQR
|
||||||
|
PRPQPRSSELVRITDVGPESERQFREHVRKTRMIYDSLRMFLMMEEAKRNGVGGRRARAD
|
||||||
|
GKAGKAGSMMRDCMLWMNRDKRIVGSIPGVQVGDIFFFRFELCVMGLHGHPQSGIDFLTG
|
||||||
|
SLSSNGEPIATSVIVSGGYEDDDDQGDVIMYTGQGGQDRLGRQAEHQRLEGGNLAMERSM
|
||||||
|
YYGIEVRVIRGLKYENEVSSRVYVYDGLFRIVDSWFDVGKSGFGVFKYRLERIEGQAEMG
|
||||||
|
SSVLKFARTLKTNPLSVRPRGYINFDISNGKENVPVYLFNDIDSDQEPLYYEYLAQTSFP
|
||||||
|
PGLFVQQSGNASGCDCVNGCGSGCLCEAKNSGEIAYDYNGTLIRQKPLIHECGSACQCPP
|
||||||
|
SCRNRVTQKGLRNRLEVFRSLETGWGVRSLDVLHAGAFICEYAGVALTREQANILTMNGD
|
||||||
|
TLVYPARFSSARWEDWGDLSQVLADFERPSYPDIPPVDFAMDVSKMRNVACYISHSTDPN
|
||||||
|
VIVQFVLHDHNSLMFPRVMLFAAENIPPMTELSLDYGVVDDWNAKLAICN
|
||||||
|
>seq009
|
||||||
|
MDKSIPIKAIPVACVRPDLVDDVTKNTSTIPTMVSPVLTNMPSATSPLLMVPPLRTIWPS
|
||||||
|
NKEWYDGDAGPSSTGPIKREASDNTNDTAHNTFAPPPEMVIPLITIRPSDDSSNYSCDAG
|
||||||
|
AGPSTGPVKRGRGRPKGSKNSTPTEPKKPKVYDPNSLKVTSRGNFDSEITEAETETGNQE
|
||||||
|
IVDSVMMRFDAVRRRLCQINHPEDILTTASGNCTKMGVKTNTRRRIGAVPGIHVGDIFYY
|
||||||
|
WGEMCLVGLHKSNYGGIDFFTAAESAVEGHAAMCVVTAGQYDGETEGLDTLIYSGQGGTD
|
||||||
|
VYGNARDQEMKGGNLALEASVSKGNDVRVVRGVIHPHENNQKIYIYDGMYLVSKFWTVTG
|
||||||
|
KSGFKEFRFKLVRKPNQPPAYAIWKTVENLRNHDLIDSRQGFILEDLSFGAELLRVPLVN
|
||||||
|
EVDEDDKTIPEDFDYIPSQCHSGMMTHEFHFDRQSLGCQNCRHQPCMHQNCTCVQRNGDL
|
||||||
|
LPYHNNILVCRKPLIYECGGSCPCPDHCPTRLVQTGLKLHLEVFKTRNCGWGLRSWDPIR
|
||||||
|
AGTFICEFAGLRKTKEEVEEDDDYLFDTSKIYQRFRWNYEPELLLEDSWEQVSEFINLPT
|
||||||
|
QVLISAKEKGNVGRFMNHSCSPNVFWQPIEYENRGDVYLLIGLFAMKHIPPMTELTYDYG
|
||||||
|
VSCVERSEEDEGFLVCPYLSSSLWPSSSEIHFLINSKGRAWYDKIYRKLASQGNVSSGLD
|
||||||
|
SVKDEPEKLREEQMEGDGFKEKLSDSVLIDEKLEEYSDCDRTATTSRSHTDPVSSQSTHQ
|
||||||
|
TPESFRTPITCDDDTFVSVSGISRDVSNLIPFATETPASPVQEKMANTRSFSNNSVKGNQ
|
||||||
|
DEFFIEDFDVGPMDTIDLYDMTFREDPSDFDDNLLYAMRDRTKQLRSFKRKIMDAIKSKR
|
||||||
|
RREKEYEQLAIWFGDADMGCDLVNDKEQSTTSIDSKSSQTNVPVVSEDSEWEIL
|
||||||
|
>seq010
|
||||||
|
MMMTQRISPSNKRRRVSFVRDFPQFSVKDESDIGGDDVATIKENLDGKEDSNCVGVAYRD
|
||||||
|
HHRPKEESFDSIMKKAGFNVANGNLGNGKFPPSKRNVPLPCEGKVQPLSVEEGIKLMAYE
|
||||||
|
SQRRRCFGKPLVSTKVVQKHRYSPAKKKLSNATALRVRHSPMKKLSNASRLRANAHRPTQ
|
||||||
|
HKDERRSGVLSVIQRNRLSKDLTPRQKVQEVLRIFTLVFDELDRNKAARRGGSETAKSRI
|
||||||
|
DYQTWTILREMGMQVNSQKRIGSVPGIKVGDKIQFKAALSVIGLHFGIMSGIDYMYKGNK
|
||||||
|
EVATSIVSSEGNDYGDRFINDVMIYCGQGGNMRSKDHKAIKDQKLVGGNLALANSIKEKT
|
||||||
|
PVRVIRGERRLDNRGKDYVYDGLYRVEKYWEERGPQGNILFKFKLRRTCQPYVDF
|
||||||
|
>seq011
|
||||||
|
MSQKRSLVFAIRDFPPGCGTHIDVSSSLNHPAEKAFKHPRTGDVSGENLSFAEAKPEGTC
|
||||||
|
LKRESADQDHIFAAPEHNAKREPAGQDHVVAATTVAYATSSHRQKVEIGNSDCDPTPREK
|
||||||
|
VLEVLSLFKQVYNQLDRDKKARRGGDFLDATSRIDLKTLTVLEKMGKQVNTEKRIGSVPG
|
||||||
|
INIGDVFQYKTELRVVGLHSKPMCGIDYIKLGDDRITTSIVASEGYGYNDTYNSGVMVYT
|
||||||
|
GEGGNVINKQKKTEDQKLVKGNLALATSMRQKSQVRVIRGEERLDRKGKRYVYDGLYMVE
|
||||||
|
EYWVERDVRGKSVYKFKLCRIPGQLPLT
|
||||||
|
>seq012
|
||||||
|
MCLVGLHRNTAGGIDSLLAKESGVDGPAATSVVTSGKYDNETEDLETLIYSGHGGKPCDQ
|
||||||
|
VLQRGNRALEASVRRRNEVRVIRGELYNNEKVYIYDGLYLVSDCWQVTGKSGFKEYRFKL
|
||||||
|
LRKPGQPPGYAIWKLVENLRNHELIDPRQGFILGDLSFGEEGLRVPLVNEVDEEDKTIPD
|
||||||
|
DFDYIRSQCYSGMTNDVNVDSQSLVQSYIHQNCTCILKNCGQLPYHDNILVCRKPLIYEC
|
||||||
|
GGSCPTRMVETGLKLHLEVFKTSNCGWGLRSWDPIRAGTFICEFTGVSKTKEEVEEDDDY
|
||||||
|
LFDTSRIYHSFRWNYEPELLCEDACEQVSEDANLPTQVLISAKEKGNVGRFMNHNCWPNV
|
||||||
|
FWQPIEYDDNNGHIYVRIGLFAMKHIPPMTELTYDYGISCVEKTGEDEVIYKGKKICLCG
|
||||||
|
SVKCRGSFG
|
||||||
|
>seq013
|
||||||
|
MGLVGLHSGTIDMEFIGVEDHGDEEGKQIAVSVISSGKNADKTEDPDSLIFTGFGGTDMY
|
||||||
|
HGQPCNQKLERLNIPLEAAFRKKSIVRVVRCMKDEKRTNGNIYIYDGTYMITNRWEEEGQ
|
||||||
|
NGFIVFKFKLVREPDQKPAFGIWKSIQNWRNGLSIRPGLILEDLSNGAENLKVCLVNEVD
|
||||||
|
KENGPALFRYVTSLIHEVINNIPSMVDRCACGRRSCGSKHVFREKLSVSSSLVISAKKSG
|
||||||
|
NVARFMNHSCSPNVFWQSIAREQNGLWCLYIGFFAMKHIPPLTELRYDYGKSRGGGKKMC
|
||||||
|
LCRTKKCCGSFG
|
||||||
|
>seq014
|
||||||
|
MTRVNQLPCDCVSTAEESLTSGTCITPTHVTSLSSPLDRSGDVDPLPVSDESGGSKADES
|
||||||
|
MTDADETKKRKRILSGDCEADENNKSDGEIASLNDGVDAFTAICEDLNCSLCNQLPDRPV
|
||||||
|
TILCGHNFCLKCFDKWIDQGNQICATCRSTIPDKMAANPRVNSSLVSVIRYVKVAKTAGV
|
||||||
|
GTANFFPFTSNQDGPENAFRTKRAKIGEENAARIYVTVPFDHFGPIPAEHDPVRNQGVLV
|
||||||
|
GESWENRVECRQWGVHLPHVSCIAGQEDYGAQSVVISGGYKDDEDHGEWFLYTGRSRGRH
|
||||||
|
FANEDQEFEDLNEALRVSCEMGYPVRVVRSYKDRYSAYAPKEGVRYDGVYRIEKCWRKAR
|
||||||
|
FPVCRYLFVRCDNEPAPWNSDESGDRPRPLPNIPELETASDLFERKESPSWDFDEAEGRW
|
||||||
|
RWMKPPPANHEQRERMKMAMTCLLLFVLIILVGSSSILYQY
|
||||||
|
>seq015
|
||||||
|
MTRVNQLPCDCVSTAEESLTSGTCITPTHVTSLSSPLDRSGDVDPLPVSDESGGSKADES
|
||||||
|
MTDADETKKRKRILSGDCEADENNKSDGEIASLNDGVDAFTAICEDLNCSLCNQLPDRPV
|
||||||
|
TILCGHNFCLKCFDKWIDQGNQICATCRSTIPDKMAANPRVNSSLVSVIRYVKVAKTAGV
|
||||||
|
GTANFFPFTSNQDGPENAFRTKRAKIGEENAARIYVTVPFDHFGPIPAEHDPVRNQGVLV
|
||||||
|
GESWENRVECRQWGVHLPHVSCIAGQEDYGAQSVVISGGYKDDEDHGEWFLYTGRRSYKD
|
||||||
|
RYSAYAPKEGVRYDGVYRIEKCWRKARFPDSFKVCRYLFVRCDNEPAPWNSDESGDRPRP
|
||||||
|
LPNIPELETASDLFERKESPSWDFDEAEGRWRWMKPPPANHEQRERMKMAMTCLLLFVLI
|
||||||
|
ILVGSSSILYQY
|
||||||
|
>seq016
|
||||||
|
MAEQPRINSALVSVIRMAKVSKNANSAVSAAAYHYIRNDDRPDKAFTTERAKRAGKANAS
|
||||||
|
SGQIFVTIPPDHFGPILAENDPKRSIGVLVGDTWEDRLECRQWGAHFPHVAGIAGQSTHG
|
||||||
|
AQSVALSGGYVDDEDHGEWFLYTGSGGRDLSGNKRTNKEQSSDQKFEKLNAALRISCLKG
|
||||||
|
YPVRVVRSHKEKRSSYAPEAGVRYDGVYRIEKCWRKISVQGKFKVCRYLFVRCDNEPAPW
|
||||||
|
TSDIYGDRPRPLPKVDELKGATDISERKGTPSWDFDEKEGWKWVKPPPISRKPNLSGDPA
|
||||||
|
TDKEIRRVARRAQMSVTERLLKEFGCSICKQVMKEPLTTPCAHNFCKLCLVGTYGSQSSM
|
||||||
|
RERSRGGRTLRAQKIVKKCPSCPTDICDFLENPQINREMMDLIESLQRKAVEEGDTKTSS
|
||||||
|
DVSNGAESSGDDGNNEALEKGEDDSSLKDDGSLKDDGKVVKAVVVIKEEDLQPKKSKGED
|
||||||
|
EKEQGDKKMDSADVVDIAVEKKQATKRASEKAEKKQARKRKGDAVATNDGKRMKTGGDAM
|
||||||
|
ETAAEEDAPLSGGTPVKRNSRKSSEVDAKGGGGSPVVSSPRRVTRSNAKASGEADGSPAT
|
||||||
|
RTRRATRAEA
|
||||||
|
>seq017
|
||||||
|
MTPATQYPCDPEGVCMRCKSMPPPEESLTCGTCVTPWHVSCLLSPPETLSATLQWLCPDC
|
||||||
|
SGETNPLPVSGVAAGYGSVGSDLVAAIHSIEADETLSAEEKAKKKQQLLSGKGVVDEDDE
|
||||||
|
EEKKKTSKGKKPIDVLSHFECSFCMQSLQKPVSVRVLFALALMLVWFLESTPCGHNACLK
|
||||||
|
CFLKWMGQGHRSCGTCRSVIPESMVTNPRINLSIVSAIRLARVSEKADARTSKVVHYVDN
|
||||||
|
EDRPDKAFTTERAKKTGNANASSGKIFVTIPRDHFGPIPAENDPVRNQGLLVGESWKGRL
|
||||||
|
ACRQWGAHFPHVSGIAGQASYGAQSVVLAGGYDDDEDHGEWFLYTGRTNTVQAFDQVFLN
|
||||||
|
FNEALRLSCKLGYPVRVVRSTKDKRSPYAPQGGLLRYDGVYRIEKCWRIVGIQMCRFLFV
|
||||||
|
RCDNEPAPWTSDEHGDRPRPLPNVPELNMATDLFERKESPSWDFDEGEDRWRWMKPPPAS
|
||||||
|
KKAVKNVLDPEERKLLREAIKSANPNTMRARLLKEFKCQICQKVMTNPVTTPCAHNFCKA
|
||||||
|
CLESKFAGTALVRERGSGGRKLRSQKSVMKCPCCPTDIAEFVQNPQVNREVAEVIEKLKK
|
||||||
|
QEEEENAKSLDEGQCSGTSHEEEDDEQPKKRIKLDTDAEVSATVVESDMK
|
||||||
|
>seq018
|
||||||
|
MARDIQLPCDGDGVCMRCKSNPPPEESLTCGTCVTPWHVSCLSSPPKTLASTLQWHCPDC
|
||||||
|
SGEIDPLPVSGGATGFESAGSDLVAAIRAIEADESLSTEEKAKMRQRLLSGKGVEEDDEE
|
||||||
|
EKRKKKGKGKNPNLDVLSALGDNLMCSFCMQLPERPVTKPCGHNACLKCFEKWMGQGKRT
|
||||||
|
CGKCRSIIPEKMAKNPRINSSLVAAIRLAKVSKSAAATTSKVFHFISNQDRPDKAFTTER
|
||||||
|
AKKTGKANAASGKIYVTIPPDHFGPIPAENDPVRNQGLLVGESWEDRLECRQWGAHFPHV
|
||||||
|
AGIAGQSTYGAQSVALSGGYKDDEDHGEWFLYTGSGGRDLSGNKRTNKEQSFDQKFEKSN
|
||||||
|
AALKLSCKLGYPVRVVRSHKEKRSAYAPEEGVRYDGVYRIEKCWRKVGVQVCRYLFVRCD
|
||||||
|
NEPAPWTSDENGDRPRPIPNIPELNMATDLFERKETPSWDFDEGEGCWKWMKPPPASKKS
|
||||||
|
VNVLAPEERKNLRKAIKAAHSNTMRARLLKEFKCQICQQVLTLPVTTPCAHNFCKACLEA
|
||||||
|
KFAGKTLVRERSTGGRTLRSRKNVLNCPCCPTDISDFLQNPQVNREVAEVIEKLKTQEED
|
||||||
|
TAELEDEDEGECSGTTPEEDSEQPKKRIKLDTDATVSATIR
|
||||||
|
>seq019
|
||||||
|
MAIQTQLPCDGDGVCMRCQVTPPSEETLTCGTCVTPWHVSCLLPESLASSTGDWECPDCS
|
||||||
|
GVVVPSAAPGTGISGPESSGSVLVAAIRAIQADVTLTEAEKAKKRQRLMSGGGDDGVDDE
|
||||||
|
EKKKLEIFCSICIQLPERPVTTPCGHNFCLKCFEKWAVGQGKLTCMICRSKIPRHVAKNP
|
||||||
|
RINLALVSAIRLANVTKCSGEATAAKVHHIIRNQDRPDKAFTTERAVKTGKANAASGKFF
|
||||||
|
VTIPRDHFGPIPAANDVTRNQGVLVGESWEDRQECRQWGVHFPHVAGIAGQAAVGAQSVA
|
||||||
|
LSGGYDDDEDHGEWFLYTGSGGRDLSGNKRVNKIQSSDQAFKNMNEALRLSCKMGYPVRV
|
||||||
|
VRSWKEKRSAYAPAEGVRYDGVYRIEKCWSNVGVQGLHKMCRYLFVRCDNEPAPWTSDEH
|
||||||
|
GDRPRPLPDVPELENATDLFVRKESPSWGFDEAEGRWKWMKSPPVSRMALDTEERKKNKR
|
||||||
|
AKKGNNAMKARLLKEFSCQICRKVLSLPVTTPCAHNFCKACLEAKFAGITQLRDRSNGVR
|
||||||
|
KLRAKKNIMTCPCCTTDLSEFLQNPQVNREMMEIIENFKKSEEEAEVAESSNISEEEEEE
|
||||||
|
SEPPTKKIKMDNNSVGDTSLSA
|
||||||
|
>seq020
|
||||||
|
MAIQTQLPCDGDGVCMRCQVNPPSEETLTCGTCVTPWHVSCLLPESLASSTGDWECPDCS
|
||||||
|
GVVVPSAAPGTGISGPESSGSVLVTAIRAIQADVTLTEAEKAKKRQRLMSGGGDDGVDDE
|
||||||
|
EKKKLEIFCSICIQLPERPVTTPCGHNFCLKCFEKWAVGQGKLTCMICRSKIPRHVAKNP
|
||||||
|
RINLALVSAIRLANVTKCSGEATAAKVHHIIRNQDRPDKAFTTERAVKTGKANAASGVLV
|
||||||
|
GESWEDRQECRQWGVHFPHVAGIAGQAAVGAQSVALSGGYDDDEDHGEWFLYTGSGGRDL
|
||||||
|
SGNKRVNKIQSSDQAFKNMNEALRLSCKMGYPVRVVRSWKEKRSAYAPAEGVRYDGVYRI
|
||||||
|
EKCWSNVGVQGLHKMCRYLFVRCDNEPAPWTSDEHGDRPRPLPDVPELENATDLFVRKES
|
||||||
|
PSWGFDEAEGRWKWMKSPPVSRMALDTEERKKNKRAKKGNNAMKARLLKEFSCQICRKVL
|
||||||
|
SLPVTTPCAHNFCKACLEAKFAGITQLRDRSNGVRKLRAKKNIMTCPCCTTDLSEFLQNP
|
||||||
|
QVNREMMEIIENFKKSEEEAEVAESSNISEEEGEEESEPPTKKIKMDKNSVGGTSLSA
|
||||||
|
>seq021
|
||||||
|
MAIETQLPCDGDGVCMRCQVNPPSEETLTCGTCVTPWHVPCLLPESLASSTGEWECPDCS
|
||||||
|
GVVVPSAAPGTGNARPESSGSVLVAAIRAIQADETLTEAEKAKKRQKLMSGGGDDGVDEE
|
||||||
|
EKKKLEIFCSICIQLPERPITTPCGHNFCLKCFEKWAVGQGKLTCMICRSKIPRHVAKNP
|
||||||
|
RINLALVSAIRLANVTKCSVEATAAKVHHIIRNQDRPEKAFTTERAVKTGKANAASGKFF
|
||||||
|
VTIPRDHFGPIPAENDVTRKQGVLVGESWEDRQECRQWGAHFPHIAGIAGQSAVGAQSVA
|
||||||
|
LSGGYDDDEDHGEWFLYTGSGGRDLSGNKRINKKQSSDQAFKNMNESLRLSCKMGYPVRV
|
||||||
|
VRSWKEKRSAYAPAEGVRYDGVYRIEKCWSNVGVQGSFKVCRYLFVRCDNEPAPWTSDEH
|
||||||
|
GDRPRPLPNVPELETAADLFVRKESPSWDFDEAEGRWKWMKSPPVSRMALDPEERKKNKR
|
||||||
|
AKNTMKARLLKEFSCQICREVLSLPVTTPCAHNFCKACLEAKFAGITQLRERSNGGRKLR
|
||||||
|
AKKNIMTCPCCTTDLSEFLQNPQVNREMMEIIENFKKSEEEADASISEEEEEESEPPTKK
|
||||||
|
IKMDNNSVGGSGTSLSA
|
||||||
|
>seq022
|
||||||
|
MWIQVRTMDGRQTHTVDSLSRLTKVEELRRKIQELFHVEPGLQRLFYRGKQMEDGHTLFD
|
||||||
|
YEVRLNDTIQLLVRQSLVLPHSTKERDSELSDTDSGCCLGQSESDKSSTHGEAAAETDSR
|
||||||
|
PADEDMWDETELGLYKVNEYVDARDTNMGAWFEAQVVRVTRKAPSRDEPCSSTSRPALEE
|
||||||
|
DVIYHVKYDDYPENGVVQMNSRDVRARARTIIKWQDLEVGQVVMLNYNPDNPKERGFWYD
|
||||||
|
AEISRKRETRTARELYANVVLGDDSLNDCRIIFVDEVFKIERPGEGSPMVDNPMRRKSGP
|
||||||
|
SCKHCKDDVNRLCRVCACHLCGGRQDPDKQLMCDECDMAFHIYCLDPPLSSVPSEDEWYC
|
||||||
|
PECRNDASEVVLAGERLRESKKNAKMASATSSSQRDWGKGMACVGRTKECTIVPSNHYGP
|
||||||
|
IPGIPVGTMWRFRVQVSESGVHRPHVAGIHGRSNDGSYSLVLAGGYEDDVDHGNFFTYTG
|
||||||
|
SGGRDLSGNKRTAEQSCDQKLTNTNRALALNCFAPINDQEGAEAKDWRSGKPVRVVRNVK
|
||||||
|
GGKNSKYAPAEGNRYDGIYKVVKYWPEKGKSGFLVWRYLLRRDDDEPGPWTKEGKDRIKK
|
||||||
|
LGLTMQYPEGYLEALANREREKENSKREEEEQQEGGFASPRTGKGKWKRKSAGGGPSRAG
|
||||||
|
SPRRTSKKTKVEPYSLTAQQSSLIREDKSNAKLWNEVLASLKDRPASGSPFQLFLSKVEE
|
||||||
|
TFQCICCQELVFRPITTVCQHNVCKDCLDRSFRAQVFSCPACRYDLGRSYAMQVNQPLQT
|
||||||
|
VLNQLFPGYGNGR
|
||||||
|
>seq023
|
||||||
|
MWIQVRTMDGKETHTVNSLSRLTKVQELRKKIEEVFHVEPQLQRLFYRGKQMEDGHTLFD
|
||||||
|
YDVRLNDTIQLLVRQSLALPLSTKERDSELSDSDSGYGVGHSESDKSSTHGEGAAEADDK
|
||||||
|
TVWEDTDLGLYKVNEYVDVRDNIFGAWFEAQVVQVQKRALSEDEPCSSSAVKTSEDDIMY
|
||||||
|
HVKYDDYPEHGVDIVKAKNVRARARTVIPWENLEVGQVVMANYNVDYPRKRGFWYDVEIC
|
||||||
|
RKRQTRTARELYGNIRLLNDSQLNNCRIMFVDEVLMIELPKERRPLIASPSQPPPALRNT
|
||||||
|
GKSGPSCRFCKDDENKPCRKCACHVCGGREAPEKQLLCDECDMAFHLYCLKPPLTSVPPE
|
||||||
|
PEWYCPSCRTDSSEVVQAGEKLKESKKKAKMASATSSSRRDWGKGMACVGRTTECTIVPA
|
||||||
|
NHFGPIPGVPVGTMWRFRVQVSESGVHRPHVAGIHGRSNDGAYSLVLAGGYEDDVDNGNY
|
||||||
|
FTYTGSGGRDLSGNKRTAGQSSDQKLTNNNRALALNCHSPINEKGAEAEDWRQGKPVRVV
|
||||||
|
RNMKGGKHSKYAPAEGNRYDGIYKVVKYWPERGKSGFLVWRYLLRRDDTEPEPWTREGKD
|
||||||
|
RTRQLGLTMQYPEGYLEALANKEKSRKRPAKALEQGPSSSKTGKSKQKSTGPTLSSPRAS
|
||||||
|
KKSKLEPYTLSEQQANLIKEDKGNAKLWDDVLTSLQDGPYQIFLSKVKEAFQCICCQELV
|
||||||
|
FRPVTTVCQHNVCKDCLDRSFRAQVFSCPACRFELDHSSPTRVNQPLQTILNQLFPGYGS
|
||||||
|
GR
|
BIN
tests/data/fastq/dna/Ecoli_DNA_R1.fastq.gz
Normal file
BIN
tests/data/fastq/dna/Ecoli_DNA_R1.fastq.gz
Normal file
Binary file not shown.
BIN
tests/data/fastq/dna/Ecoli_DNA_R2.fastq.gz
Normal file
BIN
tests/data/fastq/dna/Ecoli_DNA_R2.fastq.gz
Normal file
Binary file not shown.
BIN
tests/data/fastq/dna/SRR396636_R1.fastq.gz
Normal file
BIN
tests/data/fastq/dna/SRR396636_R1.fastq.gz
Normal file
Binary file not shown.
BIN
tests/data/fastq/dna/SRR396636_R2.fastq.gz
Normal file
BIN
tests/data/fastq/dna/SRR396636_R2.fastq.gz
Normal file
Binary file not shown.
BIN
tests/data/fastq/methylated_dna/Ecoli_10K_methylated_R1.fastq.gz
Normal file
BIN
tests/data/fastq/methylated_dna/Ecoli_10K_methylated_R1.fastq.gz
Normal file
Binary file not shown.
BIN
tests/data/fastq/methylated_dna/Ecoli_10K_methylated_R2.fastq.gz
Normal file
BIN
tests/data/fastq/methylated_dna/Ecoli_10K_methylated_R2.fastq.gz
Normal file
Binary file not shown.
60
tests/data/fastq/methylated_dna/bisulfite-seq_test_data.md
Normal file
60
tests/data/fastq/methylated_dna/bisulfite-seq_test_data.md
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
# E. coli paired-end test dataset for Bisulfite-seq applications
|
||||||
|
|
||||||
|
The E. coli data set was generated using [Sherman](https://github.com/FelixKrueger/Sherman) as 10,000 reads of paired-end data, with average methylation levels of 80% in CpG context, and 10% in non-CG context. The files can be found in the folder: genaral/fastq/dna, and are called:
|
||||||
|
|
||||||
|
|
||||||
|
`Ecoli_10K_methylated_R1.fastq.gz`
|
||||||
|
|
||||||
|
`Ecoli_10K_methylated_R2.fastq.gz`
|
||||||
|
|
||||||
|
```bash
|
||||||
|
Sherman --non_dir --genome /bi/scratch/Genomes/E_coli/ --paired -n 10000 -l 100 --CG 20 --CH 90
|
||||||
|
```
|
||||||
|
|
||||||
|
The data is non-directional, so it should produce roughly 25% mapping to each of the `OT`, `CTOT`, `CTOB` and `OB` strands. Thus, the data can be used for bisulfite mapping in standard (= directional), `--pbat` and `--non_directional` mode.
|
||||||
|
|
||||||
|
A test alignment should look roughly like this:
|
||||||
|
|
||||||
|
`bismark --genome /bi/scratch/Genomes/E_coli/ -1 Ecoli_10K_methylated_R1.fastq.gz -2 Ecoli_10K_methylated_R2.fastq.gz --non_dir`
|
||||||
|
|
||||||
|
``` csv
|
||||||
|
Bismark report for: Ecoli_10K_methylated_R1.fastq.gz and Ecoli_10K_methylated_R2.fastq.gz (version: v0.22.3)
|
||||||
|
Bismark was run with Bowtie 2 against the bisulfite genome of /bi/scratch/Genomes/E_coli/ with the specified options: -q --score-min L,0,-0.2 --ignore-quals --no-mixed --no-discordant --dovetail --maxins 500
|
||||||
|
Option '--non_directional' specified: alignments to all strands were being performed (OT, OB, CTOT, CTOB)
|
||||||
|
|
||||||
|
Final Alignment report
|
||||||
|
======================
|
||||||
|
Sequence pairs analysed in total: 10000
|
||||||
|
Number of paired-end alignments with a unique best hit: 9320
|
||||||
|
Mapping efficiency: 93.2%
|
||||||
|
Sequence pairs with no alignments under any condition: 0
|
||||||
|
Sequence pairs did not map uniquely: 680
|
||||||
|
Sequence pairs which were discarded because genomic sequence could not be extracted: 0
|
||||||
|
|
||||||
|
Number of sequence pairs with unique best (first) alignment came from the bowtie output:
|
||||||
|
CT/GA/CT: 2341 ((converted) top strand)
|
||||||
|
GA/CT/CT: 2329 (complementary to (converted) top strand)
|
||||||
|
GA/CT/GA: 2356 (complementary to (converted) bottom strand)
|
||||||
|
CT/GA/GA: 2294 ((converted) bottom strand)
|
||||||
|
|
||||||
|
Final Cytosine Methylation Report
|
||||||
|
=================================
|
||||||
|
Total number of C's analysed: 471997
|
||||||
|
|
||||||
|
Total methylated C's in CpG context: 111011
|
||||||
|
Total methylated C's in CHG context: 11923
|
||||||
|
Total methylated C's in CHH context: 21433
|
||||||
|
Total methylated C's in Unknown context: 0
|
||||||
|
|
||||||
|
Total unmethylated C's in CpG context: 27790
|
||||||
|
Total unmethylated C's in CHG context: 105877
|
||||||
|
Total unmethylated C's in CHH context: 193963
|
||||||
|
Total unmethylated C's in Unknown context: 0
|
||||||
|
|
||||||
|
C methylated in CpG context: 80.0%
|
||||||
|
C methylated in CHG context: 10.1%
|
||||||
|
C methylated in CHH context: 10.0%
|
||||||
|
Can't determine percentage of methylated Cs in unknown context (CN or CHN) if value was 0
|
||||||
|
|
||||||
|
Bismark completed in 0d 0h 0m 12s
|
||||||
|
```
|
7
tests/data/fastq/rna/paired_end_RNAseq_test_data.md
Normal file
7
tests/data/fastq/rna/paired_end_RNAseq_test_data.md
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
# Paired-end RNA-seq test dataset
|
||||||
|
|
||||||
|
The data here are 2x 76bp RNA-seq data from mouse (10,000 reads of paired-end data). The files can be found in the folder: genaral/fastq/rna, and are called:
|
||||||
|
|
||||||
|
`test_R1.fastq.gz`
|
||||||
|
|
||||||
|
`test_R2.fastq.gz`
|
Binary file not shown.
BIN
tests/data/fastq/rna/test_R1_val_1.fq.gz
Normal file
BIN
tests/data/fastq/rna/test_R1_val_1.fq.gz
Normal file
Binary file not shown.
Binary file not shown.
BIN
tests/data/fastq/rna/test_R2_val_2.fq.gz
Normal file
BIN
tests/data/fastq/rna/test_R2_val_2.fq.gz
Normal file
Binary file not shown.
Loading…
Reference in a new issue