diff --git a/conf/test_nothing.config b/conf/test_nothing.config index 91b0aa4..c0ecece 100644 --- a/conf/test_nothing.config +++ b/conf/test_nothing.config @@ -12,7 +12,7 @@ params { config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset without performing any preprocessing nor profiling to check pipeline function. Useful when you only wish to test a single profiler without having to opt-out of all the others' + config_profile_description = "Minimal test dataset without performing any preprocessing nor profiling to check pipeline function. Useful when you only wish to test a single profiler without having to 'opt-out' of all the others" // Limit resources so that this can run on GitHub Actions max_cpus = 2 diff --git a/modules.json b/modules.json index 33c0c10..3b8cab2 100644 --- a/modules.json +++ b/modules.json @@ -3,104 +3,141 @@ "homePage": "https://github.com/nf-core/taxprofiler", "repos": { "nf-core/modules": { - "adapterremoval": { - "git_sha": "879d42c5e28661fe0a5e744c9e2c515868f9e08a" - }, - "bbmap/bbduk": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "bowtie2/align": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "bowtie2/build": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "cat/fastq": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "centrifuge/centrifuge": { - "git_sha": "d2726fcf75063960f06b36d2229a4c0966614108" - }, - "centrifuge/kreport": { - "git_sha": "734d0db6079a4aa43b6509b207e5d6feb35d4838" - }, - "custom/dumpsoftwareversions": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "diamond/blastx": { - "git_sha": "3531824af826c16cd252bc5aa82ae169b244ebaa" - }, - "fastp": { - "git_sha": "d0a1cbb703a130c19f6796c3fce24fbe7dfce789" - }, - "fastqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "filtlong": { - "git_sha": "957cb9b83668075f4af101fc99502908cca487e3" - }, - "gunzip": { - "git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9" - }, - "kaiju/kaiju": { - "git_sha": "8856f127c58f6af479128be8b8df4d42e442ddbe" - }, - "kaiju/kaiju2krona": { - "git_sha": "2f0b19240430de6807b1232e6d9d0e8084e8a28f" - }, - "kaiju/kaiju2table": { - "git_sha": "538dbac98ba9c8f799536cd5a617195501439457" - }, - "kraken2/kraken2": { - "git_sha": "abe025677cdd805cc93032341ab19885473c1a07" - }, - "krakentools/kreport2krona": { - "git_sha": "8b2a473f586bed003e72d2b183acc43fc0ddc422" - }, - "krona/ktimporttaxonomy": { - "git_sha": "0e9fd9370ad1845870b8a9c63fcc47d999a1739e" - }, - "krona/ktimporttext": { - "git_sha": "cdefbec66999c0b49d8bfeea9d6f9d19056635a2" - }, - "malt/run": { - "git_sha": "be8d7b3293cac26cc63e4dbfb364deb8ed6ec7e5" - }, - "megan/rma2info": { - "git_sha": "2d38566eca4cc15142b2ffa7c11837569b39aece" - }, - "metaphlan3": { - "git_sha": "ed4dd1a928ebf4308efb720de878045f7773f8e2" - }, - "minimap2/align": { - "git_sha": "1a5a9e7b4009dcf34e6867dd1a5a1d9a718b027b" - }, - "minimap2/index": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "motus/merge": { - "git_sha": "b02e648c221e1da17cb589eefe297e61ec9e9c49" - }, - "motus/profile": { - "git_sha": "b6ed584443ad68ac41e6975994139454a4f23c18" - }, - "multiqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "porechop": { - "git_sha": "b78e19b9dae3671db2c7d4346fe04452c1debfab" - }, - "prinseqplusplus": { - "git_sha": "f1c5384c31e985591716afdd732cf8c2ae29d05b" - }, - "samtools/bam2fq": { - "git_sha": "5510ea39fe638594bc26ac34cadf4a84bf27d159" - }, - "samtools/view": { - "git_sha": "6b64f9cb6c3dd3577931cc3cd032d6fb730000ce" - }, - "untar": { - "git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918" + "git_url": "https://github.com/nf-core/modules.git", + "modules": { + "adapterremoval": { + "branch": "master", + "git_sha": "879d42c5e28661fe0a5e744c9e2c515868f9e08a" + }, + "bbmap/bbduk": { + "branch": "master", + "git_sha": "848ee9a215d02d80be033bfa60881700f2bd914c" + }, + "bowtie2/align": { + "branch": "master", + "git_sha": "848ee9a215d02d80be033bfa60881700f2bd914c" + }, + "bowtie2/build": { + "branch": "master", + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "cat/fastq": { + "branch": "master", + "git_sha": "b034029b59b1198075da8019074bc02051a6100e" + }, + "centrifuge/centrifuge": { + "branch": "master", + "git_sha": "d2726fcf75063960f06b36d2229a4c0966614108" + }, + "centrifuge/kreport": { + "branch": "master", + "git_sha": "734d0db6079a4aa43b6509b207e5d6feb35d4838" + }, + "custom/dumpsoftwareversions": { + "branch": "master", + "git_sha": "5e7b1ef9a5a2d9258635bcbf70fcf37dacd1b247" + }, + "diamond/blastx": { + "branch": "master", + "git_sha": "3531824af826c16cd252bc5aa82ae169b244ebaa" + }, + "fastp": { + "branch": "master", + "git_sha": "7e8ad566883449e7939062b5e2bcf53fc1e0002f", + "patch": "modules/nf-core/modules/fastp/fastp.diff" + }, + "fastqc": { + "branch": "master", + "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" + }, + "filtlong": { + "branch": "master", + "git_sha": "957cb9b83668075f4af101fc99502908cca487e3" + }, + "gunzip": { + "branch": "master", + "git_sha": "fa37e0662690c4ec4260dae282fbce08777503e6" + }, + "kaiju/kaiju": { + "branch": "master", + "git_sha": "8856f127c58f6af479128be8b8df4d42e442ddbe" + }, + "kaiju/kaiju2krona": { + "branch": "master", + "git_sha": "2f0b19240430de6807b1232e6d9d0e8084e8a28f" + }, + "kaiju/kaiju2table": { + "branch": "master", + "git_sha": "538dbac98ba9c8f799536cd5a617195501439457" + }, + "kraken2/kraken2": { + "branch": "master", + "git_sha": "409a308ba46284d8ebb48c2c1befd6f6433db3f7" + }, + "krakentools/kreport2krona": { + "branch": "master", + "git_sha": "233fa70811a03a4cecb2ece483b5c8396e2cee1d" + }, + "krona/ktimporttaxonomy": { + "branch": "master", + "git_sha": "0e9fd9370ad1845870b8a9c63fcc47d999a1739e" + }, + "krona/ktimporttext": { + "branch": "master", + "git_sha": "cdefbec66999c0b49d8bfeea9d6f9d19056635a2" + }, + "malt/run": { + "branch": "master", + "git_sha": "be8d7b3293cac26cc63e4dbfb364deb8ed6ec7e5" + }, + "megan/rma2info": { + "branch": "master", + "git_sha": "2d38566eca4cc15142b2ffa7c11837569b39aece" + }, + "metaphlan3/metaphlan3": { + "branch": "master", + "git_sha": "940d7fe9d63962e0e2ba0987e2893fb0aff832e3" + }, + "minimap2/align": { + "branch": "master", + "git_sha": "1a5a9e7b4009dcf34e6867dd1a5a1d9a718b027b" + }, + "minimap2/index": { + "branch": "master", + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "motus/merge": { + "branch": "master", + "git_sha": "b02e648c221e1da17cb589eefe297e61ec9e9c49" + }, + "motus/profile": { + "branch": "master", + "git_sha": "b6ed584443ad68ac41e6975994139454a4f23c18" + }, + "multiqc": { + "branch": "master", + "git_sha": "16eee433b87b303bda650131ac5a0b1ad725e166" + }, + "porechop": { + "branch": "master", + "git_sha": "b78e19b9dae3671db2c7d4346fe04452c1debfab" + }, + "prinseqplusplus": { + "branch": "master", + "git_sha": "f1c5384c31e985591716afdd732cf8c2ae29d05b" + }, + "samtools/bam2fq": { + "branch": "master", + "git_sha": "5510ea39fe638594bc26ac34cadf4a84bf27d159" + }, + "samtools/view": { + "branch": "master", + "git_sha": "5e7b1ef9a5a2d9258635bcbf70fcf37dacd1b247" + }, + "untar": { + "branch": "master", + "git_sha": "5e7b1ef9a5a2d9258635bcbf70fcf37dacd1b247" + } } } } diff --git a/modules/nf-core/modules/bbmap/bbduk/meta.yml b/modules/nf-core/modules/bbmap/bbduk/meta.yml index 6abd3d9..d79b9d0 100644 --- a/modules/nf-core/modules/bbmap/bbduk/meta.yml +++ b/modules/nf-core/modules/bbmap/bbduk/meta.yml @@ -4,6 +4,7 @@ keywords: - trimming - adapter trimming - quality trimming + - fastq tools: - bbmap: description: BBMap is a short read aligner, as well as various other bioinformatic tools. diff --git a/modules/nf-core/modules/bowtie2/align/main.nf b/modules/nf-core/modules/bowtie2/align/main.nf index 7e8a965..c74e376 100644 --- a/modules/nf-core/modules/bowtie2/align/main.nf +++ b/modules/nf-core/modules/bowtie2/align/main.nf @@ -1,77 +1,71 @@ process BOWTIE2_ALIGN { tag "$meta.id" - label 'process_high' + label "process_high" - conda (params.enable_conda ? 'bioconda::bowtie2=2.4.4 bioconda::samtools=1.14 conda-forge::pigz=2.6' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:4d235f41348a00533f18e47c9669f1ecb327f629-0' : - 'quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:4d235f41348a00533f18e47c9669f1ecb327f629-0' }" + conda (params.enable_conda ? "bioconda::bowtie2=2.4.4 bioconda::samtools=1.15.1 conda-forge::pigz=2.6" : null) + container "${ workflow.containerEngine == "singularity" && !task.ext.singularity_pull_docker_container ? + "https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:1744f68fe955578c63054b55309e05b41c37a80d-0" : + "quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:1744f68fe955578c63054b55309e05b41c37a80d-0" }" input: tuple val(meta), path(reads) path index val save_unaligned + val sort_bam output: - tuple val(meta), path('*.bam') , emit: bam - tuple val(meta), path('*.log') , emit: log - tuple val(meta), path('*fastq.gz'), emit: fastq, optional:true + tuple val(meta), path("*.bam") , emit: bam + tuple val(meta), path("*.log") , emit: log + tuple val(meta), path("*fastq.gz"), emit: fastq, optional:true path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' + def args = task.ext.args ?: "" + def args2 = task.ext.args2 ?: "" def prefix = task.ext.prefix ?: "${meta.id}" + + def unaligned = "" + def reads_args = "" if (meta.single_end) { - def unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : '' - """ - INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'` - bowtie2 \\ - -x \$INDEX \\ - -U $reads \\ - --threads $task.cpus \\ - $unaligned \\ - $args \\ - 2> ${prefix}.bowtie2.log \\ - | samtools view -@ $task.cpus $args2 -bhS -o ${prefix}.bam - - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) - END_VERSIONS - """ + unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : "" + reads_args = "-U ${reads}" } else { - def unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : '' - """ - INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'` - bowtie2 \\ - -x \$INDEX \\ - -1 ${reads[0]} \\ - -2 ${reads[1]} \\ - --threads $task.cpus \\ - $unaligned \\ - $args \\ - 2> ${prefix}.bowtie2.log \\ - | samtools view -@ $task.cpus $args2 -bhS -o ${prefix}.bam - - - if [ -f ${prefix}.unmapped.fastq.1.gz ]; then - mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz - fi - if [ -f ${prefix}.unmapped.fastq.2.gz ]; then - mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz - fi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) - END_VERSIONS - """ + unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : "" + reads_args = "-1 ${reads[0]} -2 ${reads[1]}" } + + def samtools_command = sort_bam ? 'sort' : 'view' + + """ + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/.rev.1.bt2//"` + [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/.rev.1.bt2l//"` + [ -z "\$INDEX" ] && echo "Bowtie2 index files not found" 1>&2 && exit 1 + + bowtie2 \\ + -x \$INDEX \\ + $reads_args \\ + --threads $task.cpus \\ + $unaligned \\ + $args \\ + 2> ${prefix}.bowtie2.log \\ + | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam - + + if [ -f ${prefix}.unmapped.fastq.1.gz ]; then + mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz + fi + + if [ -f ${prefix}.unmapped.fastq.2.gz ]; then + mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/bowtie2/align/meta.yml b/modules/nf-core/modules/bowtie2/align/meta.yml index f80421e..42ba0f9 100644 --- a/modules/nf-core/modules/bowtie2/align/meta.yml +++ b/modules/nf-core/modules/bowtie2/align/meta.yml @@ -2,7 +2,9 @@ name: bowtie2_align description: Align reads to a reference genome using bowtie2 keywords: - align + - map - fasta + - fastq - genome - reference tools: @@ -29,6 +31,15 @@ input: type: file description: Bowtie2 genome index files pattern: "*.ebwt" + - save_unaligned: + type: boolean + description: | + Save reads that do not map to the reference (true) or discard them (false) + (default: false) + - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" output: - bam: type: file diff --git a/modules/nf-core/modules/cat/fastq/main.nf b/modules/nf-core/modules/cat/fastq/main.nf index bf0877c..d275f19 100644 --- a/modules/nf-core/modules/cat/fastq/main.nf +++ b/modules/nf-core/modules/cat/fastq/main.nf @@ -4,8 +4,8 @@ process CAT_FASTQ { conda (params.enable_conda ? "conda-forge::sed=4.7" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : - 'biocontainers/biocontainers:v1.2.0_cv1' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" input: tuple val(meta), path(reads, stageAs: "input*/*") @@ -48,4 +48,33 @@ process CAT_FASTQ { """ } } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads.collect{ it.toString() } + if (meta.single_end) { + if (readList.size > 1) { + """ + touch ${prefix}.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } else { + if (readList.size > 2) { + """ + touch ${prefix}_1.merged.fastq.gz + touch ${prefix}_2.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } + } diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf b/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf index 327d510..34b50b9 100644 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf @@ -1,11 +1,11 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_low' + label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda (params.enable_conda ? "bioconda::multiqc=1.11" : null) + conda (params.enable_conda ? 'bioconda::multiqc=1.13a' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.13a--pyhdfd78af_1' : + 'quay.io/biocontainers/multiqc:1.13a--pyhdfd78af_1' }" input: path versions diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py index 787bdb7..d139039 100644 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ b/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -1,10 +1,9 @@ #!/usr/bin/env python +import yaml import platform from textwrap import dedent -import yaml - def _make_versions_html(versions): html = [ @@ -59,12 +58,11 @@ versions_by_module = {} for process, process_versions in versions_by_process.items(): module = process.split(":")[-1] try: - if versions_by_module[module] != process_versions: - raise AssertionError( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) + assert versions_by_module[module] == process_versions, ( + "We assume that software versions are the same between all modules. " + "If you see this error-message it means you discovered an edge-case " + "and should open an issue in nf-core/tools. " + ) except KeyError: versions_by_module[module] = process_versions diff --git a/modules/nf-core/modules/fastp/fastp.diff b/modules/nf-core/modules/fastp/fastp.diff new file mode 100644 index 0000000..a05d486 --- /dev/null +++ b/modules/nf-core/modules/fastp/fastp.diff @@ -0,0 +1,33 @@ +Changes in module 'nf-core/modules/fastp' +--- modules/nf-core/modules/fastp/main.nf ++++ modules/nf-core/modules/fastp/main.nf +@@ -33,9 +33,8 @@ + def fail_fastq = save_trimmed_fail ? "--failed_out ${prefix}.fail.fastq.gz" : '' + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz +- cat ${prefix}.fastq.gz \\ +- | fastp \\ +- --stdin \\ ++ ++ fastp \\ + --stdout \\ + --in1 ${prefix}.fastq.gz \\ + --thread $task.cpus \\ +@@ -45,6 +44,7 @@ + $args \\ + 2> ${prefix}.fastp.log \\ + | gzip -c > ${prefix}.fastp.fastq.gz ++ + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") +@@ -69,6 +69,7 @@ + --detect_adapter_for_pe \\ + $args \\ + 2> ${prefix}.fastp.log ++ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + +************************************************************ diff --git a/modules/nf-core/modules/fastp/main.nf b/modules/nf-core/modules/fastp/main.nf index 5c9e3b8..abbdbad 100644 --- a/modules/nf-core/modules/fastp/main.nf +++ b/modules/nf-core/modules/fastp/main.nf @@ -13,7 +13,7 @@ process FASTP { val save_merged output: - tuple val(meta), path('*.trim.fastq.gz') , optional:true, emit: reads + tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads tuple val(meta), path('*.json') , emit: json tuple val(meta), path('*.html') , emit: html tuple val(meta), path('*.log') , emit: log @@ -28,19 +28,23 @@ process FASTP { def args = task.ext.args ?: '' // Added soft-links to original fastqs for consistent naming in MultiQC def prefix = task.ext.prefix ?: "${meta.id}" + // Use single ended for interleaved. Add --interleaved_in in config. if (meta.single_end) { def fail_fastq = save_trimmed_fail ? "--failed_out ${prefix}.fail.fastq.gz" : '' """ - [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + fastp \\ + --stdout \\ --in1 ${prefix}.fastq.gz \\ - --out1 ${prefix}.trim.fastq.gz \\ --thread $task.cpus \\ --json ${prefix}.fastp.json \\ --html ${prefix}.fastp.html \\ $fail_fastq \\ $args \\ - 2> ${prefix}.fastp.log + 2> ${prefix}.fastp.log \\ + | gzip -c > ${prefix}.fastp.fastq.gz + cat <<-END_VERSIONS > versions.yml "${task.process}": fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") @@ -50,13 +54,13 @@ process FASTP { def fail_fastq = save_trimmed_fail ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' """ - [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz - [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz + [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz fastp \\ --in1 ${prefix}_1.fastq.gz \\ --in2 ${prefix}_2.fastq.gz \\ - --out1 ${prefix}_1.trim.fastq.gz \\ - --out2 ${prefix}_2.trim.fastq.gz \\ + --out1 ${prefix}_1.fastp.fastq.gz \\ + --out2 ${prefix}_2.fastp.fastq.gz \\ --json ${prefix}.fastp.json \\ --html ${prefix}.fastp.html \\ $fail_fastq \\ @@ -65,6 +69,7 @@ process FASTP { --detect_adapter_for_pe \\ $args \\ 2> ${prefix}.fastp.log + cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/fastp/meta.yml b/modules/nf-core/modules/fastp/meta.yml index 3274e41..598c336 100644 --- a/modules/nf-core/modules/fastp/meta.yml +++ b/modules/nf-core/modules/fastp/meta.yml @@ -15,7 +15,7 @@ input: - meta: type: map description: | - Groovy Map containing sample information + Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads. e.g. [ id:'test', single_end:false ] - reads: type: file @@ -38,7 +38,7 @@ output: - reads: type: file description: The trimmed/modified/unmerged fastq reads - pattern: "*trim.fastq.gz" + pattern: "*fastp.fastq.gz" - json: type: file description: Results in JSON format diff --git a/modules/nf-core/modules/fastqc/main.nf b/modules/nf-core/modules/fastqc/main.nf index ed6b8c5..0573036 100644 --- a/modules/nf-core/modules/fastqc/main.nf +++ b/modules/nf-core/modules/fastqc/main.nf @@ -44,4 +44,16 @@ process FASTQC { END_VERSIONS """ } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.html + touch ${prefix}.zip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/gunzip/main.nf b/modules/nf-core/modules/gunzip/main.nf index 61bf1af..7036704 100644 --- a/modules/nf-core/modules/gunzip/main.nf +++ b/modules/nf-core/modules/gunzip/main.nf @@ -31,4 +31,14 @@ process GUNZIP { gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') END_VERSIONS """ + + stub: + gunzip = archive.toString() - '.gz' + """ + touch $gunzip + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/kraken2/kraken2/main.nf b/modules/nf-core/modules/kraken2/kraken2/main.nf index d400023..43a1679 100644 --- a/modules/nf-core/modules/kraken2/kraken2/main.nf +++ b/modules/nf-core/modules/kraken2/kraken2/main.nf @@ -14,11 +14,11 @@ process KRAKEN2_KRAKEN2 { val save_reads_assignment output: - tuple val(meta), path('*classified*') , optional:true, emit: classified_reads_fastq - tuple val(meta), path('*unclassified*') , optional:true, emit: unclassified_reads_fastq - tuple val(meta), path('*classifiedreads*'), optional:true, emit: classified_reads_assignment - tuple val(meta), path('*report.txt') , emit: report - path "versions.yml" , emit: versions + tuple val(meta), path('*.classified{.,_}*') , optional:true, emit: classified_reads_fastq + tuple val(meta), path('*.unclassified{.,_}*') , optional:true, emit: unclassified_reads_fastq + tuple val(meta), path('*classifiedreads.txt') , optional:true, emit: classified_reads_assignment + tuple val(meta), path('*report.txt') , emit: report + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -29,9 +29,9 @@ process KRAKEN2_KRAKEN2 { def paired = meta.single_end ? "" : "--paired" def classified = meta.single_end ? "${prefix}.classified.fastq" : "${prefix}.classified#.fastq" def unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq" - def classified_command = save_output_fastqs ? "--classified-out ${classified}" : "" - def unclassified_command = save_output_fastqs ? "--unclassified-out ${unclassified}" : "" - def readclassification_command = save_reads_assignment ? "--output ${prefix}.kraken2.classifiedreads.txt" : "" + def classified_option = save_output_fastqs ? "--classified-out ${classified}" : "" + def unclassified_option = save_output_fastqs ? "--unclassified-out ${unclassified}" : "" + def readclassification_option = save_reads_assignment ? "--output ${prefix}.kraken2.classifiedreads.txt" : "" def compress_reads_command = save_output_fastqs ? "pigz -p $task.cpus *.fastq" : "" """ @@ -40,9 +40,9 @@ process KRAKEN2_KRAKEN2 { --threads $task.cpus \\ --report ${prefix}.kraken2.report.txt \\ --gzip-compressed \\ - $unclassified_command \\ - $classified_command \\ - $readclassification_command \\ + $unclassified_option \\ + $classified_option \\ + $readclassification_option \\ $paired \\ $args \\ $reads diff --git a/modules/nf-core/modules/krakentools/kreport2krona/main.nf b/modules/nf-core/modules/krakentools/kreport2krona/main.nf index 3bf46ee..1fcb1e4 100644 --- a/modules/nf-core/modules/krakentools/kreport2krona/main.nf +++ b/modules/nf-core/modules/krakentools/kreport2krona/main.nf @@ -1,9 +1,8 @@ -def VERSION = '1.2' // Version information not provided by tool on CLI - process KRAKENTOOLS_KREPORT2KRONA { tag "$meta.id" label 'process_low' + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. conda (params.enable_conda ? "bioconda::krakentools=1.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/krakentools:1.2--pyh5e36f6f_0': @@ -22,6 +21,7 @@ process KRAKENTOOLS_KREPORT2KRONA { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ kreport2krona.py \\ -r ${kreport} \\ diff --git a/modules/nf-core/modules/metaphlan3/main.nf b/modules/nf-core/modules/metaphlan3/metaphlan3/main.nf similarity index 94% rename from modules/nf-core/modules/metaphlan3/main.nf rename to modules/nf-core/modules/metaphlan3/metaphlan3/main.nf index bff0eb9..e56f102 100644 --- a/modules/nf-core/modules/metaphlan3/main.nf +++ b/modules/nf-core/modules/metaphlan3/metaphlan3/main.nf @@ -28,15 +28,18 @@ process METAPHLAN3 { def bowtie2_out = "$input_type" == "--input_type bowtie2out" || "$input_type" == "--input_type sam" ? '' : "--bowtie2out ${prefix}.bowtie2out.txt" """ + BT2_DB=`find -L "${metaphlan_db}" -name "*rev.1.bt2" -exec dirname {} \\;` + metaphlan \\ --nproc $task.cpus \\ $input_type \\ $input_data \\ $args \\ $bowtie2_out \\ - --bowtie2db ${metaphlan_db} \\ + --bowtie2db \$BT2_DB \\ --biom ${prefix}.biom \\ --output_file ${prefix}_profile.txt + cat <<-END_VERSIONS > versions.yml "${task.process}": metaphlan3: \$(metaphlan --version 2>&1 | awk '{print \$3}') diff --git a/modules/nf-core/modules/metaphlan3/meta.yml b/modules/nf-core/modules/metaphlan3/metaphlan3/meta.yml similarity index 82% rename from modules/nf-core/modules/metaphlan3/meta.yml rename to modules/nf-core/modules/metaphlan3/metaphlan3/meta.yml index d10a27d..093de5b 100644 --- a/modules/nf-core/modules/metaphlan3/meta.yml +++ b/modules/nf-core/modules/metaphlan3/metaphlan3/meta.yml @@ -24,6 +24,12 @@ input: type: file description: Metaphlan 3.0 can classify the metagenome from a variety of input data types, including FASTQ files (single-end and paired-end), FASTA, bowtie2-produced SAM files (produced from alignments to the MetaPHlAn marker database) and intermediate bowtie2 alignment files (bowtie2out) pattern: "*.{fastq.gz, fasta, fasta.gz, sam, bowtie2out.txt}" + - metaphlan_db: + type: file + description: | + Directory containing pre-downloaded and uncompressed MetaPhlAn3 database downloaded from: http://cmprod1.cibio.unitn.it/biobakery3/metaphlan_databases/. + Note that you will also need to specify `--index` and the database version name (e.g. 'mpa_v31_CHOCOPhlAn_201901') in your module.conf ext.args for METAPHLAN3_METAPHLAN3! + pattern: "*/" output: - meta: diff --git a/modules/nf-core/modules/multiqc/main.nf b/modules/nf-core/modules/multiqc/main.nf index 1264aac..c8e6ace 100644 --- a/modules/nf-core/modules/multiqc/main.nf +++ b/modules/nf-core/modules/multiqc/main.nf @@ -1,13 +1,15 @@ process MULTIQC { label 'process_medium' - conda (params.enable_conda ? 'bioconda::multiqc=1.12' : null) + conda (params.enable_conda ? 'bioconda::multiqc=1.13a' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.12--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.13a--pyhdfd78af_1' : + 'quay.io/biocontainers/multiqc:1.13a--pyhdfd78af_1' }" input: - path multiqc_files + path multiqc_files, stageAs: "?/*" + path(multiqc_config) + path(multiqc_logo) output: path "*multiqc_report.html", emit: report @@ -20,8 +22,25 @@ process MULTIQC { script: def args = task.ext.args ?: '' + def config = multiqc_config ? "--config $multiqc_config" : '' """ - multiqc -f $args . + multiqc \\ + --force \\ + $config \\ + $args \\ + . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ + + stub: + """ + touch multiqc_data + touch multiqc_plots + touch multiqc_report.html cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/multiqc/meta.yml b/modules/nf-core/modules/multiqc/meta.yml index 6fa891e..a1029f3 100644 --- a/modules/nf-core/modules/multiqc/meta.yml +++ b/modules/nf-core/modules/multiqc/meta.yml @@ -12,11 +12,21 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] + input: - multiqc_files: type: file description: | List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" + output: - report: type: file @@ -38,3 +48,4 @@ authors: - "@abhi18av" - "@bunop" - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/modules/samtools/view/main.nf b/modules/nf-core/modules/samtools/view/main.nf index 55194e8..59ded5c 100644 --- a/modules/nf-core/modules/samtools/view/main.nf +++ b/modules/nf-core/modules/samtools/view/main.nf @@ -1,6 +1,6 @@ process SAMTOOLS_VIEW { tag "$meta.id" - label 'process_medium' + label 'process_low' conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/nf-core/modules/untar/main.nf b/modules/nf-core/modules/untar/main.nf index dc43fb7..4128652 100644 --- a/modules/nf-core/modules/untar/main.nf +++ b/modules/nf-core/modules/untar/main.nf @@ -1,11 +1,11 @@ process UNTAR { tag "$archive" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "conda-forge::tar=1.32" : null) + conda (params.enable_conda ? "conda-forge::sed=4.7" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : - 'biocontainers/biocontainers:v1.2.0_cv1' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" input: tuple val(meta), path(archive) @@ -21,12 +21,29 @@ process UNTAR { def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' untar = archive.toString() - '.tar.gz' + """ + mkdir output + tar \\ + -C output --strip-components 1 \\ -xzvf \\ $args \\ $archive \\ - $args2 \\ + $args2 + + mv output ${untar} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + untar = archive.toString() - '.tar.gz' + """ + touch $untar cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index c4c5195..37b4a2c 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -7,7 +7,7 @@ include { MEGAN_RMA2INFO as MEGAN_RMA2INFO_TSV } from '../../modules/nf-core/mo include { KRAKEN2_KRAKEN2 } from '../../modules/nf-core/modules/kraken2/kraken2/main' include { CENTRIFUGE_CENTRIFUGE } from '../../modules/nf-core/modules/centrifuge/centrifuge/main' include { CENTRIFUGE_KREPORT } from '../../modules/nf-core/modules/centrifuge/kreport/main' -include { METAPHLAN3 } from '../../modules/nf-core/modules/metaphlan3/main' +include { METAPHLAN3 } from '../../modules/nf-core/modules/metaphlan3/metaphlan3/main' include { KAIJU_KAIJU } from '../../modules/nf-core/modules/kaiju/kaiju/main' include { DIAMOND_BLASTX } from '../../modules/nf-core/modules/diamond/blastx/main' include { MOTUS_PROFILE } from '../../modules/nf-core/modules/motus/profile/main' diff --git a/subworkflows/local/shortread_hostremoval.nf b/subworkflows/local/shortread_hostremoval.nf index 505f989..c5f15c7 100644 --- a/subworkflows/local/shortread_hostremoval.nf +++ b/subworkflows/local/shortread_hostremoval.nf @@ -22,7 +22,7 @@ workflow SHORTREAD_HOSTREMOVAL { ch_bowtie2_index = index.first() } - BOWTIE2_ALIGN ( reads, ch_bowtie2_index, true ) + BOWTIE2_ALIGN ( reads, ch_bowtie2_index, true, false ) ch_versions = ch_versions.mix( BOWTIE2_ALIGN.out.versions.first() ) ch_multiqc_files = ch_multiqc_files.mix( BOWTIE2_ALIGN.out.log ) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index a47b803..223ba15 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -42,8 +42,7 @@ if (params.run_malt && params.run_krona && !params.krona_taxonomy_directory) log ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -ch_multiqc_config = file("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty() +ch_multiqc_config = params.multiqc_config ? file( params.multiqc_config, checkIfExists: true ) : file("$projectDir/assets/multiqc_config.yml", checkIfExists: true) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -93,7 +92,7 @@ def multiqc_report = [] workflow TAXPROFILER { ch_versions = Channel.empty() - ch_taxprofiler_logo = Channel.fromPath("$projectDir/docs/images/nf-core-taxprofiler_logo_custom_light.png") + ch_multiqc_logo= Channel.fromPath("$projectDir/docs/images/nf-core-taxprofiler_logo_custom_light.png") /* SUBWORKFLOW: Read in samplesheet, validate and stage input files @@ -241,14 +240,11 @@ workflow TAXPROFILER { ch_workflow_summary = Channel.value(workflow_summary) ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_config)) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ch_taxprofiler_logo.ifEmpty([])) - if (params.perform_shortread_qc) { ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) ) } @@ -273,7 +269,9 @@ workflow TAXPROFILER { // TODO create multiQC module for metaphlan MULTIQC ( - ch_multiqc_files.collect() + ch_multiqc_files.collect(), + ch_multiqc_config, + ch_multiqc_logo ) multiqc_report = MULTIQC.out.report.toList() ch_versions = ch_versions.mix(MULTIQC.out.versions)