From 879d42c5e28661fe0a5e744c9e2c515868f9e08a Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Mon, 4 Apr 2022 21:40:35 +0200 Subject: [PATCH] Refactor adapterremoval (#1491) * refactor: insert .fastq file extensions * style: insert whitespace * refactor: create paired output * refactor: rename settings from log Requested by @jfy133 * tests: correct expected output * fix: remove settings option due to default * chore: rename output patterns * refactor: omit paired files in single-end * refactor: rename output to settings --- modules/adapterremoval/main.nf | 44 ++++++++++++++++++++------- modules/adapterremoval/meta.yml | 14 ++++----- tests/modules/adapterremoval/test.yml | 26 ++++++++-------- 3 files changed, 53 insertions(+), 31 deletions(-) diff --git a/modules/adapterremoval/main.nf b/modules/adapterremoval/main.nf index 9d16b9c9..0e17c055 100644 --- a/modules/adapterremoval/main.nf +++ b/modules/adapterremoval/main.nf @@ -12,15 +12,14 @@ process ADAPTERREMOVAL { path(adapterlist) output: - tuple val(meta), path("${prefix}.truncated.gz") , optional: true, emit: singles_truncated - tuple val(meta), path("${prefix}.discarded.gz") , optional: true, emit: discarded - tuple val(meta), path("${prefix}.pair1.truncated.gz") , optional: true, emit: pair1_truncated - tuple val(meta), path("${prefix}.pair2.truncated.gz") , optional: true, emit: pair2_truncated - tuple val(meta), path("${prefix}.collapsed.gz") , optional: true, emit: collapsed - tuple val(meta), path("${prefix}.collapsed.truncated.gz") , optional: true, emit: collapsed_truncated - tuple val(meta), path("${prefix}.paired.gz") , optional: true, emit: paired_interleaved - tuple val(meta), path('*.log') , emit: log - path "versions.yml" , emit: versions + tuple val(meta), path("${prefix}.truncated.fastq.gz") , optional: true, emit: singles_truncated + tuple val(meta), path("${prefix}.discarded.fastq.gz") , optional: true, emit: discarded + tuple val(meta), path("${prefix}.pair{1,2}.truncated.fastq.gz") , optional: true, emit: paired_truncated + tuple val(meta), path("${prefix}.collapsed.fastq.gz") , optional: true, emit: collapsed + tuple val(meta), path("${prefix}.collapsed.truncated.fastq.gz") , optional: true, emit: collapsed_truncated + tuple val(meta), path("${prefix}.paired.fastq.gz") , optional: true, emit: paired_interleaved + tuple val(meta), path('*.settings') , emit: settings + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -38,10 +37,19 @@ process ADAPTERREMOVAL { $adapterlist \\ --basename ${prefix} \\ --threads ${task.cpus} \\ - --settings ${prefix}.log \\ --seed 42 \\ --gzip + ensure_fastq() { + if [ -f "\${1}" ]; then + mv "\${1}" "\${1::-3}.fastq.gz" + fi + + } + + ensure_fastq '${prefix}.truncated.gz' + ensure_fastq '${prefix}.discarded.gz' + cat <<-END_VERSIONS > versions.yml "${task.process}": adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g") @@ -56,10 +64,24 @@ process ADAPTERREMOVAL { $adapterlist \\ --basename ${prefix} \\ --threads $task.cpus \\ - --settings ${prefix}.log \\ --seed 42 \\ --gzip + ensure_fastq() { + if [ -f "\${1}" ]; then + mv "\${1}" "\${1::-3}.fastq.gz" + fi + + } + + ensure_fastq '${prefix}.truncated.gz' + ensure_fastq '${prefix}.discarded.gz' + ensure_fastq '${prefix}.pair1.truncated.gz' + ensure_fastq '${prefix}.pair2.truncated.gz' + ensure_fastq '${prefix}.collapsed.gz' + ensure_fastq '${prefix}.collapsed.truncated.gz' + ensure_fastq '${prefix}.paired.gz' + cat <<-END_VERSIONS > versions.yml "${task.process}": adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g") diff --git a/modules/adapterremoval/meta.yml b/modules/adapterremoval/meta.yml index 5faad043..77273f60 100644 --- a/modules/adapterremoval/meta.yml +++ b/modules/adapterremoval/meta.yml @@ -43,43 +43,43 @@ output: Adapter trimmed FastQ files of either single-end reads, or singleton 'orphaned' reads from merging of paired-end data (i.e., one of the pair was lost due to filtering thresholds). - pattern: "*.truncated.gz" + pattern: "*.truncated.fastq.gz" - discarded: type: file description: | Adapter trimmed FastQ files of reads that did not pass filtering thresholds. - pattern: "*.discarded.gz" + pattern: "*.discarded.fastq.gz" - pair1_truncated: type: file description: | Adapter trimmed R1 FastQ files of paired-end reads that did not merge with their respective R2 pair due to long templates. The respective pair is stored in 'pair2_truncated'. - pattern: "*.pair1.truncated.gz" + pattern: "*.pair1.truncated.fastq.gz" - pair2_truncated: type: file description: | Adapter trimmed R2 FastQ files of paired-end reads that did not merge with their respective R1 pair due to long templates. The respective pair is stored in 'pair1_truncated'. - pattern: "*.pair2.truncated.gz" + pattern: "*.pair2.truncated.fastq.gz" - collapsed: type: file description: | Collapsed FastQ of paired-end reads that successfully merged with their respective R1 pair but were not trimmed. - pattern: "*.collapsed.gz" + pattern: "*.collapsed.fastq.gz" - collapsed_truncated: type: file description: | Collapsed FastQ of paired-end reads that successfully merged with their respective R1 pair and were trimmed of adapter due to sufficient overlap. - pattern: "*.collapsed.truncated.gz" + pattern: "*.collapsed.truncated.fastq.gz" - log: type: file description: AdapterRemoval log file - pattern: "*.log" + pattern: "*.settings" - versions: type: file description: File containing software versions diff --git a/tests/modules/adapterremoval/test.yml b/tests/modules/adapterremoval/test.yml index f6adfba3..e660da76 100644 --- a/tests/modules/adapterremoval/test.yml +++ b/tests/modules/adapterremoval/test.yml @@ -3,10 +3,10 @@ tags: - adapterremoval files: - - path: output/adapterremoval/test.discarded.gz - - path: output/adapterremoval/test.log + - path: output/adapterremoval/test.discarded.fastq.gz + - path: output/adapterremoval/test.settings md5sum: 2fd3d5d703b63ba33a83021fccf25f77 - - path: output/adapterremoval/test.truncated.gz + - path: output/adapterremoval/test.truncated.fastq.gz md5sum: 62139afee94defad5b83bdd0b8475a1f - path: output/adapterremoval/versions.yml md5sum: ac5b46719719b7ee62739530b80869fc @@ -16,12 +16,12 @@ tags: - adapterremoval files: - - path: output/adapterremoval/test.discarded.gz - - path: output/adapterremoval/test.log + - path: output/adapterremoval/test.discarded.fastq.gz + - path: output/adapterremoval/test.settings md5sum: b8a451d3981b327f3fdb44f40ba2d6d1 - - path: output/adapterremoval/test.pair1.truncated.gz + - path: output/adapterremoval/test.pair1.truncated.fastq.gz md5sum: 294a6277f0139bd597e57c6fa31f39c7 - - path: output/adapterremoval/test.pair2.truncated.gz + - path: output/adapterremoval/test.pair2.truncated.fastq.gz md5sum: de7b38e2c881bced8671acb1ab452d78 - path: output/adapterremoval/versions.yml md5sum: fa621c887897da5a379c719399c17db7 @@ -31,15 +31,15 @@ tags: - adapterremoval files: - - path: output/adapterremoval/test.collapsed.gz + - path: output/adapterremoval/test.collapsed.fastq.gz md5sum: ff956de3532599a56c3efe5369f0953f - - path: output/adapterremoval/test.collapsed.truncated.gz - - path: output/adapterremoval/test.discarded.gz - - path: output/adapterremoval/test.log + - path: output/adapterremoval/test.collapsed.truncated.fastq.gz + - path: output/adapterremoval/test.discarded.fastq.gz + - path: output/adapterremoval/test.settings md5sum: 7f0b2328152226e46101a535cce718b3 - - path: output/adapterremoval/test.pair1.truncated.gz + - path: output/adapterremoval/test.pair1.truncated.fastq.gz md5sum: 683be19bc1c83008944b6b719bfa34e1 - - path: output/adapterremoval/test.pair2.truncated.gz + - path: output/adapterremoval/test.pair2.truncated.fastq.gz md5sum: e6548fe061f3ef86368b26da930174d0 - path: output/adapterremoval/versions.yml md5sum: 78f589bb313c8da0147ca8ce77d7f3bf