From be93eae64057d21c7c5da34162ac2bc51fd86922 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Sat, 2 Apr 2022 21:50:13 +0200 Subject: [PATCH] fix: insert `.fastq` file extension The MetaPhlAn3 module computes the input type from the file extension without the possibility to configure an override. Since AdapterRemoval2 creates files without `.fastq` extensions, MetaPhlAn3 input was determined to be SAM. --- modules/local/ensure_fastq_extension.nf | 31 +++++ .../local/shortread_adapterremoval.nf | 126 ++++++++++++------ 2 files changed, 118 insertions(+), 39 deletions(-) create mode 100644 modules/local/ensure_fastq_extension.nf diff --git a/modules/local/ensure_fastq_extension.nf b/modules/local/ensure_fastq_extension.nf new file mode 100644 index 0000000..6de223b --- /dev/null +++ b/modules/local/ensure_fastq_extension.nf @@ -0,0 +1,31 @@ +process ENSURE_FASTQ_EXTENSION { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "conda-forge::bash=5.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv2/biocontainers_v1.2.0_cv2.img' : + 'biocontainers/biocontainers:v1.2.0_cv2' }" + + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path('*.fastq.gz'), emit: reads + + script: + if (meta.single_end) { + fastq = "${reads.baseName}.fastq.gz" + """ + ln -s '${reads}' '${fastq}' + """ + } else { + first = "${reads[0].baseName}.fastq.gz" + second = "${reads[1].baseName}.fastq.gz" + """ + ln -s '${reads[0]}' '${first}' + ln -s '${reads[1]}' '${second}' + """ + } +} diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf index 3598e3e..661062d 100644 --- a/subworkflows/local/shortread_adapterremoval.nf +++ b/subworkflows/local/shortread_adapterremoval.nf @@ -5,6 +5,11 @@ Process short raw reads with AdapterRemoval include { ADAPTERREMOVAL as ADAPTERREMOVAL_SINGLE } from '../../modules/nf-core/modules/adapterremoval/main' include { ADAPTERREMOVAL as ADAPTERREMOVAL_PAIRED } from '../../modules/nf-core/modules/adapterremoval/main' include { CAT_FASTQ } from '../../modules/nf-core/modules/cat/fastq/main' +include { + ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION1; + ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION2; + ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION3; +} from '../../modules/local/ensure_fastq_extension' workflow SHORTREAD_ADAPTERREMOVAL { @@ -30,59 +35,102 @@ workflow SHORTREAD_ADAPTERREMOVAL { */ if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) { - ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed - .mix( - ADAPTERREMOVAL_PAIRED.out.collapsed_truncated, - ADAPTERREMOVAL_PAIRED.out.singles_truncated, - ADAPTERREMOVAL_PAIRED.out.pair1_truncated, - ADAPTERREMOVAL_PAIRED.out.pair2_truncated - ) - .map { - meta, reads -> - def meta_new = meta.clone() - meta_new.single_end = true - [ meta_new, reads ] - } - .groupTuple() + ENSURE_FASTQ_EXTENSION1( + Channel.empty().mix( + ADAPTERREMOVAL_PAIRED.out.collapsed, + ADAPTERREMOVAL_PAIRED.out.collapsed_truncated, + ADAPTERREMOVAL_PAIRED.out.singles_truncated, + ADAPTERREMOVAL_PAIRED.out.pair1_truncated, + ADAPTERREMOVAL_PAIRED.out.pair2_truncated + ) + .map { meta, reads -> + meta.single_end = true + [meta, reads] + } + ) - ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads - .mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated ) + CAT_FASTQ( + ENSURE_FASTQ_EXTENSION1.out.reads + .groupTuple() + ) + + ENSURE_FASTQ_EXTENSION2(ADAPTERREMOVAL_SINGLE.out.singles_truncated) + + ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads + .mix(ENSURE_FASTQ_EXTENSION2.out.reads) } else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) { - ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed - .mix( ADAPTERREMOVAL_PAIRED.out.collapsed_truncated ) - .map { - meta, reads -> - def meta_new = meta.clone() - meta_new['single_end'] = true - [ meta_new, reads ] - } - .groupTuple(by: 0) + ENSURE_FASTQ_EXTENSION1( + Channel.empty().mix( + ADAPTERREMOVAL_PAIRED.out.collapsed, + ADAPTERREMOVAL_PAIRED.out.collapsed_truncated + ) + .map { meta, reads -> + meta.single_end = true + [meta, reads] + } + ) - ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads - .mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated ) + CAT_FASTQ( + ENSURE_FASTQ_EXTENSION1.out.reads + .groupTuple() + ) + + ENSURE_FASTQ_EXTENSION2(ADAPTERREMOVAL_SINGLE.out.singles_truncated) + + ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads + .mix(ENSURE_FASTQ_EXTENSION2.out.reads) } else { - ch_adapterremoval_reads_prepped = ADAPTERREMOVAL_PAIRED.out.pair1_truncated - .join( ADAPTERREMOVAL_PAIRED.out.pair2_truncated ) - .groupTuple() - .map { meta, pair1, pair2 -> - [ meta, [ pair1, pair2 ].flatten() ] - } - .mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated ) - } + ENSURE_FASTQ_EXTENSION1( + ADAPTERREMOVAL_PAIRED.out.pair1_truncated + .map { meta, reads -> + meta.single_end = true + [meta, reads] + } + ) - ch_processed_reads = ch_adapterremoval_reads_prepped + ENSURE_FASTQ_EXTENSION2( + ADAPTERREMOVAL_PAIRED.out.pair2_truncated + .map { meta, reads -> + meta.single_end = true + [meta, reads] + } + ) + + ENSURE_FASTQ_EXTENSION3(ADAPTERREMOVAL_SINGLE.out.singles_truncated) + + ch_adapterremoval_reads_prepped = ENSURE_FASTQ_EXTENSION1.out.reads + .join(ENSURE_FASTQ_EXTENSION2.out.reads) + .groupTuple() + .map { meta, pair1, pair2 -> + meta.single_end = false + [ meta, [ pair1, pair2 ].flatten() ] + } + .mix(ENSURE_FASTQ_EXTENSION3.out.reads) + + } ch_versions = ch_versions.mix( ADAPTERREMOVAL_SINGLE.out.versions.first() ) ch_versions = ch_versions.mix( ADAPTERREMOVAL_PAIRED.out.versions.first() ) - ch_multiqc_files = ch_multiqc_files.mix( ADAPTERREMOVAL_PAIRED.out.log.collect{it[1]}, ADAPTERREMOVAL_SINGLE.out.log.collect{it[1]} ) + ch_multiqc_files = ch_multiqc_files.mix( + ADAPTERREMOVAL_PAIRED.out.log.collect{it[1]}, + ADAPTERREMOVAL_SINGLE.out.log.collect{it[1]} + ) emit: - reads = ch_processed_reads // channel: [ val(meta), [ reads ] ] - versions = ch_versions // channel: [ versions.yml ] + reads = ch_adapterremoval_reads_prepped // channel: [ val(meta), [ reads ] ] + versions = ch_versions // channel: [ versions.yml ] mqc = ch_multiqc_files } + +def ensureFastQExtension(row) { + def (meta, read) = row + def filename = file(read.parent).resolve("${read.baseName}.fastq.gz") + + read.renameTo(filename.toString()) + return [meta, read] +}