From 4d2960c06fedfb052ce38b70c90197ea8ae8f9fc Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Fri, 18 Mar 2022 15:38:19 +0100 Subject: [PATCH 01/16] chore: add metaphlan3 module --- modules.json | 3 ++ modules/nf-core/modules/metaphlan3/main.nf | 45 ++++++++++++++++++ modules/nf-core/modules/metaphlan3/meta.yml | 52 +++++++++++++++++++++ 3 files changed, 100 insertions(+) create mode 100644 modules/nf-core/modules/metaphlan3/main.nf create mode 100644 modules/nf-core/modules/metaphlan3/meta.yml diff --git a/modules.json b/modules.json index dcfbd3f..9953edb 100644 --- a/modules.json +++ b/modules.json @@ -24,6 +24,9 @@ "malt/run": { "git_sha": "72b96f4e504eef673f2b5c13560a9d90b669129b" }, + "metaphlan3": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, "multiqc": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, diff --git a/modules/nf-core/modules/metaphlan3/main.nf b/modules/nf-core/modules/metaphlan3/main.nf new file mode 100644 index 0000000..3fc6b27 --- /dev/null +++ b/modules/nf-core/modules/metaphlan3/main.nf @@ -0,0 +1,45 @@ +process METAPHLAN3 { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? 'bioconda::metaphlan=3.0.12' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/metaphlan:3.0.12--pyhb7b1952_0' : + 'quay.io/biocontainers/metaphlan:3.0.12--pyhb7b1952_0' }" + + input: + tuple val(meta), path(input) + path metaphlan_db + + output: + tuple val(meta), path("*_profile.txt") , emit: profile + tuple val(meta), path("*.biom") , emit: biom + tuple val(meta), path('*.bowtie2out.txt'), optional:true, emit: bt2out + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input_type = ("$input".endsWith(".fastq.gz")) ? "--input_type fastq" : ("$input".contains(".fasta")) ? "--input_type fasta" : ("$input".endsWith(".bowtie2out.txt")) ? "--input_type bowtie2out" : "--input_type sam" + def input_data = ("$input_type".contains("fastq")) && !meta.single_end ? "${input[0]},${input[1]}" : "$input" + def bowtie2_out = "$input_type" == "--input_type bowtie2out" || "$input_type" == "--input_type sam" ? '' : "--bowtie2out ${prefix}.bowtie2out.txt" + + """ + metaphlan \\ + --nproc $task.cpus \\ + $input_type \\ + $input_data \\ + $args \\ + $bowtie2_out \\ + --bowtie2db ${metaphlan_db} \\ + --biom ${prefix}.biom \\ + --output_file ${prefix}_profile.txt + cat <<-END_VERSIONS > versions.yml + "${task.process}": + metaphlan3: \$(metaphlan --version 2>&1 | awk '{print \$3}') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/metaphlan3/meta.yml b/modules/nf-core/modules/metaphlan3/meta.yml new file mode 100644 index 0000000..d10a27d --- /dev/null +++ b/modules/nf-core/modules/metaphlan3/meta.yml @@ -0,0 +1,52 @@ +name: metaphlan3 +description: MetaPhlAn is a tool for profiling the composition of microbial communities from metagenomic shotgun sequencing data. +keywords: + - metagenomics + - classification + - fastq + - bam + - fasta +tools: + - metaphlan3: + description: Identify clades (phyla to species) present in the metagenome obtained from a microbiome sample and their relative abundance + homepage: https://huttenhower.sph.harvard.edu/metaphlan/ + documentation: https://github.com/biobakery/MetaPhlAn + doi: "10.7554/eLife.65088" + licence: ["MIT License"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: Metaphlan 3.0 can classify the metagenome from a variety of input data types, including FASTQ files (single-end and paired-end), FASTA, bowtie2-produced SAM files (produced from alignments to the MetaPHlAn marker database) and intermediate bowtie2 alignment files (bowtie2out) + pattern: "*.{fastq.gz, fasta, fasta.gz, sam, bowtie2out.txt}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - profile: + type: file + description: Tab-separated output file of the predicted taxon relative abundances + pattern: "*.{txt}" + - biom: + type: file + description: General-use format for representing biological sample by observation contingency tables + pattern: "*.{biom}" + - bowtie2out: + type: file + description: Intermediate Bowtie2 output produced from mapping the metagenome against the MetaPHlAn marker database ( not compatible with `bowtie2out` files generated with MetaPhlAn versions below 3 ) + pattern: "*.{bowtie2out.txt}" + +authors: + - "@MGordon09" From 69db9206108c7926a25b45aba7401e406e36b4c3 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Fri, 18 Mar 2022 16:01:33 +0100 Subject: [PATCH 02/16] feat: include metaphlan3 in workflow --- workflows/taxprofiler.nf | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index ce89a91..a61580c 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -60,7 +60,7 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/ include { CAT_FASTQ } from '../modules/nf-core/modules/cat/fastq/main' include { MALT_RUN } from '../modules/nf-core/modules/malt/run/main' include { KRAKEN2_KRAKEN2 } from '../modules/nf-core/modules/kraken2/kraken2/main' - +include { METAPHLAN3 } from '../modules/nf-core/modules/metaphlan3/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -130,6 +130,7 @@ workflow TAXPROFILER { .branch { malt: it[2]['tool'] == 'malt' kraken2: it[2]['tool'] == 'kraken2' + metaphlan3: it[2]['tool'] == 'metaphlan3' unknown: true } @@ -163,6 +164,13 @@ workflow TAXPROFILER { db: it[3] } + ch_input_for_metaphlan3 = ch_input_for_profiling.metaphlan3 + .multiMap { + it -> + reads: [ it[0] + it[2], it[1] ] + db: it[3] + } + /* MODULE: RUN PROFILING */ @@ -174,6 +182,10 @@ workflow TAXPROFILER { KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db ) } + if ( params.run_metaphlan3 ) { + METAPHLAN3 ( ch_input_for_metaphlan3.reads, ch_input_for_metaphlan3.db ) + } + /* MODULE: MultiQC */ From a425a322f294f49f0588123a555533e292bacfc7 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Fri, 18 Mar 2022 20:50:18 +0100 Subject: [PATCH 03/16] refactor: make metaphlan3 run in workflow --- nextflow.config | 3 +++ subworkflows/local/input_check.nf | 6 +++--- workflows/taxprofiler.nf | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/nextflow.config b/nextflow.config index 7be36a6..9f3b103 100644 --- a/nextflow.config +++ b/nextflow.config @@ -71,6 +71,9 @@ params { // kraken2 run_kraken2 = false + + // metaphlan3 + run_metaphlan3 = false } // Load base.config by default for all pipelines diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index ec404c2..e8669b3 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -31,9 +31,9 @@ workflow INPUT_CHECK { .set { fasta } emit: - fastq // channel: [ val(meta), [ reads ] ] - nanopore // channel: [ val(meta), [ reads ] ] - fasta // channel: [ val(meta), fasta ] + fastq = fastq ?: [] // channel: [ val(meta), [ reads ] ] + nanopore = nanopore ?: [] // channel: [ val(meta), [ reads ] ] + fasta = fasta ?: [] // channel: [ val(meta), fasta ] versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] } diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index a61580c..99a1685 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -165,9 +165,10 @@ workflow TAXPROFILER { } ch_input_for_metaphlan3 = ch_input_for_profiling.metaphlan3 + .dump(tag: "input_metaphlan3") .multiMap { it -> - reads: [ it[0] + it[2], it[1] ] + reads: [it[0] + it[2], it[1][0]] db: it[3] } From fb9afa013afa33c08554a740b73bd18ee6a1d57e Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Fri, 1 Apr 2022 16:33:22 +0200 Subject: [PATCH 04/16] fix: add metaphlan parameter to schema --- nextflow_schema.json | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index fb2ca31..358ba64 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,10 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": [ + "input", + "outdir" + ], "properties": { "input": { "type": "string", @@ -173,7 +176,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, "email_on_fail": { @@ -282,6 +292,10 @@ "run_kraken2": { "type": "boolean" }, + "run_metaphlan3": { + "type": "boolean", + "description": "Enable MetaPhlAn for taxonomic profiling" + }, "shortread_clipmerge_tool": { "type": "string", "default": "fastp", @@ -306,4 +320,4 @@ "default": 15 } } -} +} \ No newline at end of file From e579f9f5f33899336ea449eea41302ec0e8657f0 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Sat, 2 Apr 2022 00:28:54 +0200 Subject: [PATCH 05/16] style: apply prettier --- nextflow_schema.json | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 358ba64..1efdea8 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,10 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "outdir" - ], + "required": ["input", "outdir"], "properties": { "input": { "type": "string", @@ -176,14 +173,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { @@ -320,4 +310,4 @@ "default": 15 } } -} \ No newline at end of file +} From 95ed86516b445b42e1703cd2c7c020c45fabbfb3 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Sat, 2 Apr 2022 13:59:10 +0200 Subject: [PATCH 06/16] chore: add citation --- CITATIONS.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CITATIONS.md b/CITATIONS.md index 8f286b0..0b35cce 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -34,6 +34,12 @@ > Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. “Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico.” Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6. +- [Porechop](https://github.com/rrwick/Porechop) + +- [MetaPhlAn3](https://doi.org/10.7554/eLife.65088) + + > Beghini, Francesco, Lauren J McIver, Aitor Blanco-Míguez, Leonard Dubois, Francesco Asnicar, Sagun Maharjan, Ana Mailyan, et al. 2021. “Integrating Taxonomic, Functional, and Strain-Level Profiling of Diverse Microbial Communities with BioBakery 3.” Edited by Peter Turnbaugh, Eduardo Franco, and C Titus Brown. ELife 10 (May): e65088. + ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) From b0b5936fe847fd416d4f223f9f1051425fddac73 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Sat, 2 Apr 2022 14:44:27 +0200 Subject: [PATCH 07/16] chore: configure output for metaphlan3 --- conf/modules.config | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index dc8b138..5dfaa82 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -170,6 +170,15 @@ process { ] } + withName: METAPHLAN3 { + publishDir = [ + path: { "${params.outdir}/metaphlan3/${metaphlan_db.simpleName}" }, + mode: params.publish_dir_mode, + pattern: '*.{biom,txt}' + ] + ext.prefix = { "${meta.id}-${meta.run_accession}-${metaphlan_db.simpleName}" } + } + withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ path: { "${params.outdir}/pipeline_info" }, From 996b3f49126b68e273dbc94dd46302f0490a8168 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Sat, 2 Apr 2022 14:44:48 +0200 Subject: [PATCH 08/16] fix: correct the input shape to metaphlan3 --- workflows/taxprofiler.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 99a1685..54ede13 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -168,7 +168,7 @@ workflow TAXPROFILER { .dump(tag: "input_metaphlan3") .multiMap { it -> - reads: [it[0] + it[2], it[1][0]] + reads: [it[0] + it[2], it[1]] db: it[3] } @@ -217,6 +217,7 @@ workflow TAXPROFILER { // TODO MALT results overwriting per database? // TODO Versions for Karken/MALT not report? + // TODO create multiQC module for metaphlan MULTIQC ( ch_multiqc_files.collect() ) From 817678d870389a5b88b99bcf4dc7290c71dd416b Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Sat, 2 Apr 2022 14:45:07 +0200 Subject: [PATCH 09/16] chore: add metaphlan3 run to test profile --- conf/test.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/test.config b/conf/test.config index 92a10e4..b8cd47c 100644 --- a/conf/test.config +++ b/conf/test.config @@ -26,6 +26,7 @@ params { databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv' run_kraken2 = true run_malt = true + run_metaphlan3 = true shortread_clipmerge = true } From 70862570e0334e0af3a039fe790158f57c88b86d Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Sat, 2 Apr 2022 14:58:07 +0200 Subject: [PATCH 10/16] fix: use same naming scheme --- conf/modules.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 5dfaa82..37b1f2f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -172,11 +172,11 @@ process { withName: METAPHLAN3 { publishDir = [ - path: { "${params.outdir}/metaphlan3/${metaphlan_db.simpleName}" }, + path: { "${params.outdir}/metaphlan3/${meta.db_name}" }, mode: params.publish_dir_mode, pattern: '*.{biom,txt}' ] - ext.prefix = { "${meta.id}-${meta.run_accession}-${metaphlan_db.simpleName}" } + ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } } withName: CUSTOM_DUMPSOFTWAREVERSIONS { From 562fa426962da46946bf9411ac3b6f68b4ae8d96 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Sat, 2 Apr 2022 15:00:55 +0200 Subject: [PATCH 11/16] fix: remove duplicate porechop entry --- CITATIONS.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/CITATIONS.md b/CITATIONS.md index 0b35cce..b18f841 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -34,8 +34,6 @@ > Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. “Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico.” Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6. -- [Porechop](https://github.com/rrwick/Porechop) - - [MetaPhlAn3](https://doi.org/10.7554/eLife.65088) > Beghini, Francesco, Lauren J McIver, Aitor Blanco-Míguez, Leonard Dubois, Francesco Asnicar, Sagun Maharjan, Ana Mailyan, et al. 2021. “Integrating Taxonomic, Functional, and Strain-Level Profiling of Diverse Microbial Communities with BioBakery 3.” Edited by Peter Turnbaugh, Eduardo Franco, and C Titus Brown. ELife 10 (May): e65088. From 4e22bc961084688ce9a0c6f8d1d564b70b7af912 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Sat, 2 Apr 2022 15:20:36 +0200 Subject: [PATCH 12/16] fix: force UTF-8 in different locale? --- nextflow.config | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 9f3b103..588c0cf 100644 --- a/nextflow.config +++ b/nextflow.config @@ -158,7 +158,8 @@ if (!params.igenomes_ignore) { // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. env { - PYTHONNOUSERSITE = 1 + PYTHONNOUSERSITE = '1' + PYTHONUTF8 = '1' // MetaPhlAn3 may fail with a UnicodeDecodeError if the locale is not set appropriately. R_PROFILE_USER = "/.Rprofile" R_ENVIRON_USER = "/.Renviron" JULIA_DEPOT_PATH = "/usr/local/share/julia" From 41526e8a64bdbf40cc28a941e1aaa2430aa66a5d Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Sat, 2 Apr 2022 15:44:39 +0200 Subject: [PATCH 13/16] try to fix locale --- .github/workflows/ci.yml | 8 ++++++++ nextflow.config | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a8078e5..b8975b5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -46,6 +46,14 @@ jobs: wget -qO- get.nextflow.io | bash sudo mv nextflow /usr/local/bin/ + - name: Show current locale + run: locale + + - name: Set UTF-8 enabled locale + run: | + sudo locale-gen en_US.UTF-8 + sudo update-locale LANG=en_US.UTF-8 + - name: Run pipeline with test data # TODO nf-core: You can customise CI pipeline run tests as required # For example: adding multiple test runs with different parameters diff --git a/nextflow.config b/nextflow.config index 588c0cf..cd6de9c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -159,7 +159,7 @@ if (!params.igenomes_ignore) { env { PYTHONNOUSERSITE = '1' - PYTHONUTF8 = '1' // MetaPhlAn3 may fail with a UnicodeDecodeError if the locale is not set appropriately. + // PYTHONUTF8 = '1' // MetaPhlAn3 may fail with a UnicodeDecodeError if the locale is not set appropriately. R_PROFILE_USER = "/.Rprofile" R_ENVIRON_USER = "/.Renviron" JULIA_DEPOT_PATH = "/usr/local/share/julia" From 4eab257d68a36b333bc1c5e087e6a79721b5b87e Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Sat, 2 Apr 2022 17:05:46 +0200 Subject: [PATCH 14/16] style: prettify comment --- subworkflows/local/shortread_adapterremoval.nf | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf index 5e005db..3598e3e 100644 --- a/subworkflows/local/shortread_adapterremoval.nf +++ b/subworkflows/local/shortread_adapterremoval.nf @@ -24,8 +24,10 @@ workflow SHORTREAD_ADAPTERREMOVAL { ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, [] ) ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, [] ) - // due to the slightly ugly output implementation of the current AdapterRemoval2 version, each file - // has to be exported in a separate channel, and we must manually recombine when necessary + /* + * Due to the ~slightly~ very ugly output implementation of the current AdapterRemoval2 version, each file + * has to be exported in a separate channel and we must manually recombine when necessary. + */ if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) { ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed From be93eae64057d21c7c5da34162ac2bc51fd86922 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Sat, 2 Apr 2022 21:50:13 +0200 Subject: [PATCH 15/16] fix: insert `.fastq` file extension The MetaPhlAn3 module computes the input type from the file extension without the possibility to configure an override. Since AdapterRemoval2 creates files without `.fastq` extensions, MetaPhlAn3 input was determined to be SAM. --- modules/local/ensure_fastq_extension.nf | 31 +++++ .../local/shortread_adapterremoval.nf | 126 ++++++++++++------ 2 files changed, 118 insertions(+), 39 deletions(-) create mode 100644 modules/local/ensure_fastq_extension.nf diff --git a/modules/local/ensure_fastq_extension.nf b/modules/local/ensure_fastq_extension.nf new file mode 100644 index 0000000..6de223b --- /dev/null +++ b/modules/local/ensure_fastq_extension.nf @@ -0,0 +1,31 @@ +process ENSURE_FASTQ_EXTENSION { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "conda-forge::bash=5.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv2/biocontainers_v1.2.0_cv2.img' : + 'biocontainers/biocontainers:v1.2.0_cv2' }" + + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path('*.fastq.gz'), emit: reads + + script: + if (meta.single_end) { + fastq = "${reads.baseName}.fastq.gz" + """ + ln -s '${reads}' '${fastq}' + """ + } else { + first = "${reads[0].baseName}.fastq.gz" + second = "${reads[1].baseName}.fastq.gz" + """ + ln -s '${reads[0]}' '${first}' + ln -s '${reads[1]}' '${second}' + """ + } +} diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf index 3598e3e..661062d 100644 --- a/subworkflows/local/shortread_adapterremoval.nf +++ b/subworkflows/local/shortread_adapterremoval.nf @@ -5,6 +5,11 @@ Process short raw reads with AdapterRemoval include { ADAPTERREMOVAL as ADAPTERREMOVAL_SINGLE } from '../../modules/nf-core/modules/adapterremoval/main' include { ADAPTERREMOVAL as ADAPTERREMOVAL_PAIRED } from '../../modules/nf-core/modules/adapterremoval/main' include { CAT_FASTQ } from '../../modules/nf-core/modules/cat/fastq/main' +include { + ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION1; + ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION2; + ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION3; +} from '../../modules/local/ensure_fastq_extension' workflow SHORTREAD_ADAPTERREMOVAL { @@ -30,59 +35,102 @@ workflow SHORTREAD_ADAPTERREMOVAL { */ if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) { - ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed - .mix( - ADAPTERREMOVAL_PAIRED.out.collapsed_truncated, - ADAPTERREMOVAL_PAIRED.out.singles_truncated, - ADAPTERREMOVAL_PAIRED.out.pair1_truncated, - ADAPTERREMOVAL_PAIRED.out.pair2_truncated - ) - .map { - meta, reads -> - def meta_new = meta.clone() - meta_new.single_end = true - [ meta_new, reads ] - } - .groupTuple() + ENSURE_FASTQ_EXTENSION1( + Channel.empty().mix( + ADAPTERREMOVAL_PAIRED.out.collapsed, + ADAPTERREMOVAL_PAIRED.out.collapsed_truncated, + ADAPTERREMOVAL_PAIRED.out.singles_truncated, + ADAPTERREMOVAL_PAIRED.out.pair1_truncated, + ADAPTERREMOVAL_PAIRED.out.pair2_truncated + ) + .map { meta, reads -> + meta.single_end = true + [meta, reads] + } + ) - ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads - .mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated ) + CAT_FASTQ( + ENSURE_FASTQ_EXTENSION1.out.reads + .groupTuple() + ) + + ENSURE_FASTQ_EXTENSION2(ADAPTERREMOVAL_SINGLE.out.singles_truncated) + + ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads + .mix(ENSURE_FASTQ_EXTENSION2.out.reads) } else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) { - ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed - .mix( ADAPTERREMOVAL_PAIRED.out.collapsed_truncated ) - .map { - meta, reads -> - def meta_new = meta.clone() - meta_new['single_end'] = true - [ meta_new, reads ] - } - .groupTuple(by: 0) + ENSURE_FASTQ_EXTENSION1( + Channel.empty().mix( + ADAPTERREMOVAL_PAIRED.out.collapsed, + ADAPTERREMOVAL_PAIRED.out.collapsed_truncated + ) + .map { meta, reads -> + meta.single_end = true + [meta, reads] + } + ) - ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads - .mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated ) + CAT_FASTQ( + ENSURE_FASTQ_EXTENSION1.out.reads + .groupTuple() + ) + + ENSURE_FASTQ_EXTENSION2(ADAPTERREMOVAL_SINGLE.out.singles_truncated) + + ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads + .mix(ENSURE_FASTQ_EXTENSION2.out.reads) } else { - ch_adapterremoval_reads_prepped = ADAPTERREMOVAL_PAIRED.out.pair1_truncated - .join( ADAPTERREMOVAL_PAIRED.out.pair2_truncated ) - .groupTuple() - .map { meta, pair1, pair2 -> - [ meta, [ pair1, pair2 ].flatten() ] - } - .mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated ) - } + ENSURE_FASTQ_EXTENSION1( + ADAPTERREMOVAL_PAIRED.out.pair1_truncated + .map { meta, reads -> + meta.single_end = true + [meta, reads] + } + ) - ch_processed_reads = ch_adapterremoval_reads_prepped + ENSURE_FASTQ_EXTENSION2( + ADAPTERREMOVAL_PAIRED.out.pair2_truncated + .map { meta, reads -> + meta.single_end = true + [meta, reads] + } + ) + + ENSURE_FASTQ_EXTENSION3(ADAPTERREMOVAL_SINGLE.out.singles_truncated) + + ch_adapterremoval_reads_prepped = ENSURE_FASTQ_EXTENSION1.out.reads + .join(ENSURE_FASTQ_EXTENSION2.out.reads) + .groupTuple() + .map { meta, pair1, pair2 -> + meta.single_end = false + [ meta, [ pair1, pair2 ].flatten() ] + } + .mix(ENSURE_FASTQ_EXTENSION3.out.reads) + + } ch_versions = ch_versions.mix( ADAPTERREMOVAL_SINGLE.out.versions.first() ) ch_versions = ch_versions.mix( ADAPTERREMOVAL_PAIRED.out.versions.first() ) - ch_multiqc_files = ch_multiqc_files.mix( ADAPTERREMOVAL_PAIRED.out.log.collect{it[1]}, ADAPTERREMOVAL_SINGLE.out.log.collect{it[1]} ) + ch_multiqc_files = ch_multiqc_files.mix( + ADAPTERREMOVAL_PAIRED.out.log.collect{it[1]}, + ADAPTERREMOVAL_SINGLE.out.log.collect{it[1]} + ) emit: - reads = ch_processed_reads // channel: [ val(meta), [ reads ] ] - versions = ch_versions // channel: [ versions.yml ] + reads = ch_adapterremoval_reads_prepped // channel: [ val(meta), [ reads ] ] + versions = ch_versions // channel: [ versions.yml ] mqc = ch_multiqc_files } + +def ensureFastQExtension(row) { + def (meta, read) = row + def filename = file(read.parent).resolve("${read.baseName}.fastq.gz") + + read.renameTo(filename.toString()) + return [meta, read] +} From f5baf910be4a7a6207c6070fef407873201d3ce4 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Sun, 3 Apr 2022 00:21:09 +0200 Subject: [PATCH 16/16] Apply suggestions from code review Co-authored-by: James A. Fellows Yates --- nextflow.config | 1 - subworkflows/local/shortread_adapterremoval.nf | 7 ------- 2 files changed, 8 deletions(-) diff --git a/nextflow.config b/nextflow.config index cd6de9c..bc66734 100644 --- a/nextflow.config +++ b/nextflow.config @@ -159,7 +159,6 @@ if (!params.igenomes_ignore) { env { PYTHONNOUSERSITE = '1' - // PYTHONUTF8 = '1' // MetaPhlAn3 may fail with a UnicodeDecodeError if the locale is not set appropriately. R_PROFILE_USER = "/.Rprofile" R_ENVIRON_USER = "/.Renviron" JULIA_DEPOT_PATH = "/usr/local/share/julia" diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf index 661062d..e522a1a 100644 --- a/subworkflows/local/shortread_adapterremoval.nf +++ b/subworkflows/local/shortread_adapterremoval.nf @@ -127,10 +127,3 @@ workflow SHORTREAD_ADAPTERREMOVAL { mqc = ch_multiqc_files } -def ensureFastQExtension(row) { - def (meta, read) = row - def filename = file(read.parent).resolve("${read.baseName}.fastq.gz") - - read.renameTo(filename.toString()) - return [meta, read] -}