Refactor adapterremoval (#1491)

* refactor: insert .fastq file extensions

* style: insert whitespace

* refactor: create paired output

* refactor: rename settings from log

Requested by @jfy133

* tests: correct expected output

* fix: remove settings option due to default

* chore: rename output patterns

* refactor: omit paired files in single-end

* refactor: rename output to settings
This commit is contained in:
Moritz E. Beber 2022-04-04 21:40:35 +02:00 committed by GitHub
parent 13cc32399c
commit 879d42c5e2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 53 additions and 31 deletions

View file

@ -12,14 +12,13 @@ process ADAPTERREMOVAL {
path(adapterlist)
output:
tuple val(meta), path("${prefix}.truncated.gz") , optional: true, emit: singles_truncated
tuple val(meta), path("${prefix}.discarded.gz") , optional: true, emit: discarded
tuple val(meta), path("${prefix}.pair1.truncated.gz") , optional: true, emit: pair1_truncated
tuple val(meta), path("${prefix}.pair2.truncated.gz") , optional: true, emit: pair2_truncated
tuple val(meta), path("${prefix}.collapsed.gz") , optional: true, emit: collapsed
tuple val(meta), path("${prefix}.collapsed.truncated.gz") , optional: true, emit: collapsed_truncated
tuple val(meta), path("${prefix}.paired.gz") , optional: true, emit: paired_interleaved
tuple val(meta), path('*.log') , emit: log
tuple val(meta), path("${prefix}.truncated.fastq.gz") , optional: true, emit: singles_truncated
tuple val(meta), path("${prefix}.discarded.fastq.gz") , optional: true, emit: discarded
tuple val(meta), path("${prefix}.pair{1,2}.truncated.fastq.gz") , optional: true, emit: paired_truncated
tuple val(meta), path("${prefix}.collapsed.fastq.gz") , optional: true, emit: collapsed
tuple val(meta), path("${prefix}.collapsed.truncated.fastq.gz") , optional: true, emit: collapsed_truncated
tuple val(meta), path("${prefix}.paired.fastq.gz") , optional: true, emit: paired_interleaved
tuple val(meta), path('*.settings') , emit: settings
path "versions.yml" , emit: versions
when:
@ -38,10 +37,19 @@ process ADAPTERREMOVAL {
$adapterlist \\
--basename ${prefix} \\
--threads ${task.cpus} \\
--settings ${prefix}.log \\
--seed 42 \\
--gzip
ensure_fastq() {
if [ -f "\${1}" ]; then
mv "\${1}" "\${1::-3}.fastq.gz"
fi
}
ensure_fastq '${prefix}.truncated.gz'
ensure_fastq '${prefix}.discarded.gz'
cat <<-END_VERSIONS > versions.yml
"${task.process}":
adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")
@ -56,10 +64,24 @@ process ADAPTERREMOVAL {
$adapterlist \\
--basename ${prefix} \\
--threads $task.cpus \\
--settings ${prefix}.log \\
--seed 42 \\
--gzip
ensure_fastq() {
if [ -f "\${1}" ]; then
mv "\${1}" "\${1::-3}.fastq.gz"
fi
}
ensure_fastq '${prefix}.truncated.gz'
ensure_fastq '${prefix}.discarded.gz'
ensure_fastq '${prefix}.pair1.truncated.gz'
ensure_fastq '${prefix}.pair2.truncated.gz'
ensure_fastq '${prefix}.collapsed.gz'
ensure_fastq '${prefix}.collapsed.truncated.gz'
ensure_fastq '${prefix}.paired.gz'
cat <<-END_VERSIONS > versions.yml
"${task.process}":
adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")

View file

@ -43,43 +43,43 @@ output:
Adapter trimmed FastQ files of either single-end reads, or singleton
'orphaned' reads from merging of paired-end data (i.e., one of the pair
was lost due to filtering thresholds).
pattern: "*.truncated.gz"
pattern: "*.truncated.fastq.gz"
- discarded:
type: file
description: |
Adapter trimmed FastQ files of reads that did not pass filtering
thresholds.
pattern: "*.discarded.gz"
pattern: "*.discarded.fastq.gz"
- pair1_truncated:
type: file
description: |
Adapter trimmed R1 FastQ files of paired-end reads that did not merge
with their respective R2 pair due to long templates. The respective pair
is stored in 'pair2_truncated'.
pattern: "*.pair1.truncated.gz"
pattern: "*.pair1.truncated.fastq.gz"
- pair2_truncated:
type: file
description: |
Adapter trimmed R2 FastQ files of paired-end reads that did not merge
with their respective R1 pair due to long templates. The respective pair
is stored in 'pair1_truncated'.
pattern: "*.pair2.truncated.gz"
pattern: "*.pair2.truncated.fastq.gz"
- collapsed:
type: file
description: |
Collapsed FastQ of paired-end reads that successfully merged with their
respective R1 pair but were not trimmed.
pattern: "*.collapsed.gz"
pattern: "*.collapsed.fastq.gz"
- collapsed_truncated:
type: file
description: |
Collapsed FastQ of paired-end reads that successfully merged with their
respective R1 pair and were trimmed of adapter due to sufficient overlap.
pattern: "*.collapsed.truncated.gz"
pattern: "*.collapsed.truncated.fastq.gz"
- log:
type: file
description: AdapterRemoval log file
pattern: "*.log"
pattern: "*.settings"
- versions:
type: file
description: File containing software versions

View file

@ -3,10 +3,10 @@
tags:
- adapterremoval
files:
- path: output/adapterremoval/test.discarded.gz
- path: output/adapterremoval/test.log
- path: output/adapterremoval/test.discarded.fastq.gz
- path: output/adapterremoval/test.settings
md5sum: 2fd3d5d703b63ba33a83021fccf25f77
- path: output/adapterremoval/test.truncated.gz
- path: output/adapterremoval/test.truncated.fastq.gz
md5sum: 62139afee94defad5b83bdd0b8475a1f
- path: output/adapterremoval/versions.yml
md5sum: ac5b46719719b7ee62739530b80869fc
@ -16,12 +16,12 @@
tags:
- adapterremoval
files:
- path: output/adapterremoval/test.discarded.gz
- path: output/adapterremoval/test.log
- path: output/adapterremoval/test.discarded.fastq.gz
- path: output/adapterremoval/test.settings
md5sum: b8a451d3981b327f3fdb44f40ba2d6d1
- path: output/adapterremoval/test.pair1.truncated.gz
- path: output/adapterremoval/test.pair1.truncated.fastq.gz
md5sum: 294a6277f0139bd597e57c6fa31f39c7
- path: output/adapterremoval/test.pair2.truncated.gz
- path: output/adapterremoval/test.pair2.truncated.fastq.gz
md5sum: de7b38e2c881bced8671acb1ab452d78
- path: output/adapterremoval/versions.yml
md5sum: fa621c887897da5a379c719399c17db7
@ -31,15 +31,15 @@
tags:
- adapterremoval
files:
- path: output/adapterremoval/test.collapsed.gz
- path: output/adapterremoval/test.collapsed.fastq.gz
md5sum: ff956de3532599a56c3efe5369f0953f
- path: output/adapterremoval/test.collapsed.truncated.gz
- path: output/adapterremoval/test.discarded.gz
- path: output/adapterremoval/test.log
- path: output/adapterremoval/test.collapsed.truncated.fastq.gz
- path: output/adapterremoval/test.discarded.fastq.gz
- path: output/adapterremoval/test.settings
md5sum: 7f0b2328152226e46101a535cce718b3
- path: output/adapterremoval/test.pair1.truncated.gz
- path: output/adapterremoval/test.pair1.truncated.fastq.gz
md5sum: 683be19bc1c83008944b6b719bfa34e1
- path: output/adapterremoval/test.pair2.truncated.gz
- path: output/adapterremoval/test.pair2.truncated.fastq.gz
md5sum: e6548fe061f3ef86368b26da930174d0
- path: output/adapterremoval/versions.yml
md5sum: 78f589bb313c8da0147ca8ce77d7f3bf