From 36e101a78b2f87b1aef1a5b05f65b6b730f5ad9d Mon Sep 17 00:00:00 2001 From: Mahwash Jamy Date: Tue, 25 Oct 2022 09:26:33 +0000 Subject: [PATCH 01/36] Install krakenuniq module --- .../krakenuniq/preloadedkrakenuniq/main.nf | 118 ++++++++++++++++++ .../krakenuniq/preloadedkrakenuniq/meta.yml | 77 ++++++++++++ 2 files changed, 195 insertions(+) create mode 100644 modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf create mode 100644 modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf new file mode 100644 index 0000000..a355c13 --- /dev/null +++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf @@ -0,0 +1,118 @@ +process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? "bioconda::krakenuniq=1.0.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/krakenuniq:1.0.0--pl5321h19e8d03_0': + 'quay.io/biocontainers/krakenuniq:1.0.0--pl5321h19e8d03_0' }" + + input: + tuple val(meta), path(fastqs) + path db + val ram_chunk_size + val save_output_fastqs + val report_file + val save_output + + output: + tuple val(meta), path('*.classified{.,_}*') , optional:true, emit: classified_reads_fastq + tuple val(meta), path('*.unclassified{.,_}*') , optional:true, emit: unclassified_reads_fastq + tuple val(meta), path('*classified.txt') , optional:true, emit: classified_assignment + tuple val(meta), path('*report.txt') , emit: report + + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args ?: '' + + def paired = meta.single_end ? "" : "--paired" + def classified = meta.single_end ? '"\$PREFIX".classified.fastq' : '"\$PREFIX".classified#.fastq' + def unclassified = meta.single_end ? '"\$PREFIX".unclassified.fastq' : '"\$PREFIX".unclassified#.fastq' + def classified_option = save_output_fastqs ? "--classified-out ${classified}" : "" + def unclassified_option = save_output_fastqs ? "--unclassified-out ${unclassified}" : "" + def output_option = save_output ? '--output "\$PREFIX".krakenuniq.classified.txt' : "" + def report = report_file ? '--report-file "\$PREFIX".krakenuniq.report.txt' : "" + def compress_reads_command = save_output_fastqs ? "gzip --no-name *.fastq" : "" + + """ + krakenuniq \\ + $args \\ + --db $db \\ + --preload $ram_chunk_size \\ + --threads $task.cpus + + for fastq in ${fastqs.join(' ')}; do \\ + PREFIX=\$(echo \$fastq) + krakenuniq \\ + --db $db \\ + --threads $task.cpus \\ + $report \\ + $output_option \\ + $unclassified_option \\ + $classified_option \\ + $output_option \\ + $paired \\ + $args2 \\ + \$fastq + done + + $compress_reads_command + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + krakenuniq: \$(echo \$(krakenuniq --version 2>&1) | sed 's/^.*KrakenUniq version //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def args2 = task.ext.args ?: '' + + def paired = meta.single_end ? "" : "--paired" + def classified = meta.single_end ? '"\$PREFIX".classified.fastq' : '"\$PREFIX".classified#.fastq' + def unclassified = meta.single_end ? '"\$PREFIX".unclassified.fastq' : '"\$PREFIX".unclassified#.fastq' + def classified_option = save_output_fastqs ? "--classified-out ${classified}" : "" + def unclassified_option = save_output_fastqs ? "--unclassified-out ${unclassified}" : "" + def output_option = save_output ? '--output "\$PREFIX".krakenuniq.classified.txt' : "" + def report = report_file ? '--report-file "\$PREFIX".krakenuniq.report.txt' : "" + def compress_reads_command = save_output_fastqs ? "echo 'gzip --no-name *.fastq'" : "" + """ + echo "krakenuniq \\ + $args \\ + --db $db \\ + --preload $ram_chunk_size \\ + --threads $task.cpus" + + for fastq in ${fastqs.join(' ')}; do \\ + PREFIX=\$(echo \$fastq) + echo "krakenuniq \\ + --db $db \\ + --threads $task.cpus \\ + $report \\ + $output_option \\ + $unclassified_option \\ + $classified_option \\ + $output_option \\ + $paired \\ + $args2 \\ + \$fastq" + + touch "\$PREFIX".classified.fastq.gz + touch "\$PREFIX".krakenuniq.classified.txt + touch "\$PREFIX".krakenuniq.report.txt + touch "\$PREFIX".unclassified.fastq.gz + done + + $compress_reads_command + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + krakenuniq: \$(echo \$(krakenuniq --version 2>&1) | sed 's/^.*KrakenUniq version //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml b/modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml new file mode 100644 index 0000000..de788af --- /dev/null +++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml @@ -0,0 +1,77 @@ +name: "krakenuniq_preloadedkrakenuniq" +description: Classifies metagenomic sequence data using unique k-mer counts +keywords: + - classify + - metagenomics + - kmers + - fastq + - db +tools: + - "krakenuniq": + description: "Metagenomics classifier with unique k-mer counting for more specific results" + homepage: https://github.com/fbreitwieser/krakenuniq + documentation: https://github.com/fbreitwieser/krakenuniq + doi: 10.1186/s13059-018-1568-0 + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fastqs: + type: file + description: List of input FastQ files + - db: + type: directory + description: KrakenUniq database + - ram_chunk_size: + type: val + description: Amount of maximum amount of RAM each chunk of database that should be loaded at any one time + pattern: "*GB" + - save_output_fastqs: + type: boolean + description: | + If true, optional commands are added to save classified and unclassified reads + as fastq files + - save_reads_assignment: + type: boolean + description: | + If true, an optional command is added to save a file reporting the taxonomic + classification of each input read +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - classified_reads_fastq: + type: file + description: | + Reads classified as belonging to any of the taxa + on the KrakenUniq database. + pattern: "*{fastq.gz}" + - unclassified_reads_fastq: + type: file + description: | + Reads not classified to any of the taxa + on the KrakenUniq database. + pattern: "*{fastq.gz}" + - classified_assignment: + type: file + description: | + KrakenUniq output file indicating the taxonomic assignment of + each input read ## DOUBLE CHECK!! + - report: + type: file + description: | + KrakenUniq report containing stats about classified + and not classifed reads. + pattern: "*.{report.txt}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@mjamy" From 86ca71cfd4399147d7dbfac927f96c3867e3f003 Mon Sep 17 00:00:00 2001 From: Mahwash Jamy Date: Tue, 25 Oct 2022 09:45:05 +0000 Subject: [PATCH 02/36] include krakenuniq in profiling subworkflow --- modules.json | 4 ++++ subworkflows/local/profiling.nf | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/modules.json b/modules.json index 10d6c74..2331ef8 100644 --- a/modules.json +++ b/modules.json @@ -89,6 +89,10 @@ "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" }, + "krakenuniq/preloadedkrakenuniq": { + "branch": "master", + "git_sha": "13b9d4854593c03e5e25e8a8f47462542c2c0dd4" + }, "krona/ktimporttaxonomy": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 11c4a72..6432829 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -11,6 +11,7 @@ include { METAPHLAN3_METAPHLAN3 } from '../../modules/nf-core/me include { KAIJU_KAIJU } from '../../modules/nf-core/kaiju/kaiju/main' include { DIAMOND_BLASTX } from '../../modules/nf-core/diamond/blastx/main' include { MOTUS_PROFILE } from '../../modules/nf-core/motus/profile/main' +include { KRAKENUNIQ_PRELOADEDKRAKENUNIQ } from '../../modules/nf-core/krakenuniq/preloadedkrakenuniq/main' workflow PROFILING { take: @@ -45,6 +46,7 @@ workflow PROFILING { kaiju: it[2]['tool'] == 'kaiju' diamond: it[2]['tool'] == 'diamond' motus: it[2]['tool'] == 'motus' + krakenuniq: it[2]['tool'] == 'krakenuniq' unknown: true } @@ -228,6 +230,22 @@ workflow PROFILING { ch_multiqc_files = ch_multiqc_files.mix( MOTUS_PROFILE.out.log ) } + if ( params.run_krakenuniq ) { + ch_input_for_krakenuniq = ch_input_for_profiling.krakenuniq + .multiMap { + it -> + reads: [ it[0] + it[2], it[1] ] + db: it[3] + } + + KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads, ch_input_for_krakenuniq.db, params.krakenuniq_save_reads, params.krakenuniq_save_readclassification ) + ch_multiqc_files = ch_multiqc_files.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report ) + ch_versions = ch_versions.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.versions.first() ) + ch_raw_classifications = ch_raw_classifications.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.classified_assignment ) + ch_raw_profiles = ch_raw_profiles.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report ) + + } + emit: classifications = ch_raw_classifications profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom From f4717be9abcdb71771101936b9b1881142478feb Mon Sep 17 00:00:00 2001 From: Mahwash Jamy Date: Tue, 25 Oct 2022 09:50:44 +0000 Subject: [PATCH 03/36] add krakenuniq to modules.config --- conf/modules.config | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index d2a0051..252a8fe 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -295,6 +295,16 @@ process { ] } + withName: KRAKENUNIQ_PRELOADEDKRAKENUNIQ { + ext.args = { "${meta.db_params}" } + ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + publishDir = [ + path: { "${params.outdir}/krakenuniq/${meta.db_name}/" }, + mode: params.publish_dir_mode, + pattern: '*.{txt,report,fastq.gz}' + ] + } + withName: KRONA_CLEANUP { ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ From febcbfc05719d28e0c43b385322517f194c335c5 Mon Sep 17 00:00:00 2001 From: Mahwash Jamy Date: Mon, 31 Oct 2022 16:07:53 +0000 Subject: [PATCH 04/36] add missing parameters to nextflow.config --- conf/modules.config | 3 ++- nextflow.config | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 8e36c5a..eaa5157 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -335,7 +335,8 @@ process { withName: KRAKENUNIQ_PRELOADEDKRAKENUNIQ { ext.args = { "${meta.db_params}" } - ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + // one run with multiple samples, so fix ID to just db name to ensure clean log name + ext.prefix = { "${meta.db_name}" } publishDir = [ path: { "${params.outdir}/krakenuniq/${meta.db_name}/" }, mode: params.publish_dir_mode, diff --git a/nextflow.config b/nextflow.config index b369f31..6223db0 100644 --- a/nextflow.config +++ b/nextflow.config @@ -118,6 +118,11 @@ params { kraken2_save_readclassification = false // added directly to module in profiling.nf kraken2_save_minimizers = false + //krakenuniq + run_krakenuniq = false + krakenuniq_save_reads = false // added directly to module in profiling.nf + krakenuniq_save_readclassifications = false // added directly to module in profiling.nf + // Bracken run_bracken = false From 851016e497c01151c8a4bfb2ad93efc6f032f508 Mon Sep 17 00:00:00 2001 From: Mahwash Jamy Date: Mon, 31 Oct 2022 16:17:40 +0000 Subject: [PATCH 05/36] Update nextflow_scheme.json --- nextflow_schema.json | 72 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 64 insertions(+), 8 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 21e7e96..c7f4825 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,11 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "databases", "outdir"], + "required": [ + "input", + "databases", + "outdir" + ], "properties": { "input": { "type": "string", @@ -80,7 +84,10 @@ "shortread_qc_tool": { "type": "string", "default": "fastp", - "enum": ["fastp", "adapterremoval"], + "enum": [ + "fastp", + "adapterremoval" + ], "fa_icon": "fas fa-tools", "description": "Specify which tool to use for short-read QC" }, @@ -133,7 +140,11 @@ "shortread_complexityfilter_tool": { "type": "string", "default": "bbduk", - "enum": ["bbduk", "prinseqplusplus", "fastp"], + "enum": [ + "bbduk", + "prinseqplusplus", + "fastp" + ], "fa_icon": "fas fa-hammer", "description": "Specify which tool to use for complexity filtering" }, @@ -167,7 +178,10 @@ "shortread_complexityfilter_prinseqplusplus_mode": { "type": "string", "default": "entropy", - "enum": ["entropy", "dust"], + "enum": [ + "entropy", + "dust" + ], "fa_icon": "fas fa-check-square", "description": "Specify the complexity filter mode for PRINSEQ++" }, @@ -341,7 +355,15 @@ "diamond_output_format": { "type": "string", "default": "tsv", - "enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"], + "enum": [ + "blast", + "xml", + "txt", + "daa", + "sam", + "tsv", + "paf" + ], "fa_icon": "fas fa-file", "description": "Specify output format from DIAMOND profiling.", "help_text": "DIAMOND can produce output in a number of different formats, you can specify here which to produce.\n\nNote that DIAMOND can only produce one format at a time, and depending on which you pick, some downstream steps may not be executed. For example, selecting `daa` or `sam` will mean you will not get a tabular taxonomic profile as with the other tools.\n\nWill be overriden by `--diamond_save_reads.`\n\n> Modifies tool parameter(s):\n> - diamond blastx: `--outfmt`" @@ -360,7 +382,14 @@ "kaiju_taxon_rank": { "type": "string", "default": "species", - "enum": ["phylum", "class", "order", "family", "genus", "species"], + "enum": [ + "phylum", + "class", + "order", + "family", + "genus", + "species" + ], "fa_icon": "fas fa-tag", "description": "Specify taxonomic rank to be displayed in Kaiju taxon table", "help_text": "Specify the taxonomic level(s) to be displayed in the resulting Kaiju taxon table, as generated by the kaiju2table helper tool.\n\nThis can be either a single level (e.g. `species`), or a comma separated list to display the full taxonomic path (e.g. `superkingdom,phylum,class,order,family,genus,species.`).\n\n> Modifies tool parameter(s):\n> - kaiju2table: `-l`" @@ -388,6 +417,23 @@ "fa_icon": "fas fa-save", "help_text": "Turn on saving minimizer information in the kraken2 report thus increasing to an eight column layout.\n\nAdds `--report-minimizer-data` to the kraken2 command." }, + "run_krakenuniq": { + "type": "string", + "fa_icon": "fas fa-toggle-on", + "description": "Turn on profiling with KrakenUniq. Requires database to be present CSV file passed to --databases" + }, + "krakenuniq_save_reads": { + "type": "string", + "fa_icon": "fas fa-save", + "description": "Turn on saving of KrakenUniq-aligned reads", + "help_text": "Save reads that do and do not have a taxonomic classification in your output results directory in FASTQ format.\n\n> Modifies tool parameter(s):\n> - krakenuniq: `--classified-out` and `--unclassified-out`" + }, + "krakenuniq_save_readclassifications": { + "type": "string", + "fa_icon": "fas fa-save", + "description": "Turn on saving of KrakenUniq per-read taxonomic assignment file", + "help_text": "Save a text file that contains a list of each read that had a taxonomic assignment, with information on specific taxonomic taxonomic assignment that that read recieved.\n\n> Modifies tool parameter(s):\n> - krakenuniq: `--output`" + }, "run_bracken": { "type": "boolean", "description": "Post-process kraken2 reports with Bracken.", @@ -566,7 +612,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, "email_on_fail": { @@ -723,7 +776,10 @@ "preprocessing_qc_tool": { "type": "string", "default": "fastqc", - "enum": ["fastqc", "falco"], + "enum": [ + "fastqc", + "falco" + ], "help_text": "Falco is designed as a drop-in replacement for FastQC but written in C++ for faster computation. We particularly recommend using falco when using long reads (due to reduced memory constraints), however is also applicable for short reads.", "description": "Specify the tool used for quality control of raw sequencing reads" } From aecf308840acc8800c766e2bb56896e56dd5f171 Mon Sep 17 00:00:00 2001 From: Mahwash Jamy Date: Mon, 31 Oct 2022 16:41:35 +0000 Subject: [PATCH 06/36] Update documentation for krakenuniq --- CITATIONS.md | 4 ++++ docs/usage.md | 17 ++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/CITATIONS.md b/CITATIONS.md index 0415bf7..daf9022 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -36,6 +36,10 @@ > Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. Improved Metagenomic Analysis with Kraken 2. Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0. +- [KrakenUniq](https://doi.org/10.1186/s13059-018-1568-0) + + > Breitwieser, Florian P., Daniel N. Baker, and Steven L. Salzberg. 2018. KrakenUniq: confident and fast metagenomics classification using unique k-mer counts. Genome Biology 19 (1): 198. doi: 10.1186/s13059-018-1568-0 + - [Bracken](https://doi.org/10.7717/peerj-cs.104) > Lu, J., Breitwieser, F. P., Thielen, P., & Salzberg, S. L. (2017). Bracken: Estimating species abundance in metagenomics data. PeerJ Computer Science, 3, e104. doi: 10.7717/peerj-cs.104 diff --git a/docs/usage.md b/docs/usage.md index 0401540..11e077f 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -74,7 +74,7 @@ The pipeline takes the locations and specific profiling parameters of the tool o > ⚠️ nf-core/taxprofiler does not provide any databases by default, nor does it currently generate them for you. This must be performed manually by the user. See below for more information of the expected database files. -An example database sheet can look as follows, where 4 tools are being used, and `malt` and `kraken2` will be used against two databases each. This is because specifying `bracken` implies first running `kraken2` on the same database. +An example database sheet can look as follows, where 5 tools are being used, and `malt` and `kraken2` will be used against two databases each. This is because specifying `bracken` implies first running `kraken2` on the same database. ```console tool,db_name,db_params,db_path @@ -82,6 +82,7 @@ malt,malt85,-id 85,///malt/testdb-malt/ malt,malt95,-id 90,///malt/testdb-malt.tar.gz bracken,db1,,///bracken/testdb-bracken.tar.gz kraken2,db2,--quick,///kraken2/testdb-kraken2.tar.gz +krakenuniq,db3,,///krakenuniq/testdb-krakenuniq.tar.gz centrifuge,db1,,///centrifuge/minigut_cf.tar.gz metaphlan3,db1,,///metaphlan3/metaphlan_database/ motus,db_mOTU,,///motus/motus_database/ @@ -125,6 +126,20 @@ Expected (uncompressed) database files for each tool are as follows: - `database100mers.kraken` - `database150mers.kmer_distrib` - `database150mers.kraken` +- **KrakenUniq** output of `krakenuniq-build` command(s) A directory containing: + - `opts.k2d` + - `hash.k2d` + - `taxo.k2d` + - `database.idx` + - `database.kdb.counts` + - `database50mers.kmer_distrib` + - `database75mers.kmer_distrib` + - `database100mers.kmer_distrib` + - `database150mers.kmer_distrib` + - `database200mers.kmer_distrib` + - `database300mers.kmer_distrib` + - `seqid2taxid.map` + - `taxDB` - **Centrifuge** output of `centrifuge-build`. A directory containing: - `..cf` - `..cf` From 6459d8920fd53a8c03680f7b4b32bf9943f00123 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 3 Nov 2022 14:05:43 +0100 Subject: [PATCH 07/36] Linting! --- docs/usage.md | 12 ++---------- nextflow_schema.json | 6 +++--- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 11e077f..a87102c 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -131,14 +131,6 @@ Expected (uncompressed) database files for each tool are as follows: - `hash.k2d` - `taxo.k2d` - `database.idx` - - `database.kdb.counts` - - `database50mers.kmer_distrib` - - `database75mers.kmer_distrib` - - `database100mers.kmer_distrib` - - `database150mers.kmer_distrib` - - `database200mers.kmer_distrib` - - `database300mers.kmer_distrib` - - `seqid2taxid.map` - `taxDB` - **Centrifuge** output of `centrifuge-build`. A directory containing: - `..cf` @@ -192,7 +184,7 @@ work # Directory containing the nextflow working files ### Sequencing quality control -nf-core taxprofiler offers [`falco`](https://github.com/smithlabcode/falco] as an alternative option to [`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/). +nf-core taxprofiler offers [`falco`][https://github.com/smithlabcode/falco] as an alternative option to [`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/). ### Preprocessing Steps @@ -457,7 +449,7 @@ NXF_OPTS='-Xms1g -Xmx4g' ## Troubleshooting and FAQs -### I get a warning during centrifuge_kreport process with exit status 255. +### I get a warning during centrifuge_kreport process with exit status 255 When a sample has insufficient hits for abundance estimation, the resulting `report.txt` file will be empty. diff --git a/nextflow_schema.json b/nextflow_schema.json index c7f4825..7f52764 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -418,18 +418,18 @@ "help_text": "Turn on saving minimizer information in the kraken2 report thus increasing to an eight column layout.\n\nAdds `--report-minimizer-data` to the kraken2 command." }, "run_krakenuniq": { - "type": "string", + "type": "boolean", "fa_icon": "fas fa-toggle-on", "description": "Turn on profiling with KrakenUniq. Requires database to be present CSV file passed to --databases" }, "krakenuniq_save_reads": { - "type": "string", + "type": "boolean", "fa_icon": "fas fa-save", "description": "Turn on saving of KrakenUniq-aligned reads", "help_text": "Save reads that do and do not have a taxonomic classification in your output results directory in FASTQ format.\n\n> Modifies tool parameter(s):\n> - krakenuniq: `--classified-out` and `--unclassified-out`" }, "krakenuniq_save_readclassifications": { - "type": "string", + "type": "boolean", "fa_icon": "fas fa-save", "description": "Turn on saving of KrakenUniq per-read taxonomic assignment file", "help_text": "Save a text file that contains a list of each read that had a taxonomic assignment, with information on specific taxonomic taxonomic assignment that that read recieved.\n\n> Modifies tool parameter(s):\n> - krakenuniq: `--output`" From 0c445759e4a23f7f2d6e0fc172e0a9c075b742fe Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Thu, 3 Nov 2022 13:42:39 +0000 Subject: [PATCH 08/36] [automated] Fix linting with Prettier --- nextflow_schema.json | 55 +++++++------------------------------------- 1 file changed, 8 insertions(+), 47 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 7f52764..bb22fac 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,11 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "databases", - "outdir" - ], + "required": ["input", "databases", "outdir"], "properties": { "input": { "type": "string", @@ -84,10 +80,7 @@ "shortread_qc_tool": { "type": "string", "default": "fastp", - "enum": [ - "fastp", - "adapterremoval" - ], + "enum": ["fastp", "adapterremoval"], "fa_icon": "fas fa-tools", "description": "Specify which tool to use for short-read QC" }, @@ -140,11 +133,7 @@ "shortread_complexityfilter_tool": { "type": "string", "default": "bbduk", - "enum": [ - "bbduk", - "prinseqplusplus", - "fastp" - ], + "enum": ["bbduk", "prinseqplusplus", "fastp"], "fa_icon": "fas fa-hammer", "description": "Specify which tool to use for complexity filtering" }, @@ -178,10 +167,7 @@ "shortread_complexityfilter_prinseqplusplus_mode": { "type": "string", "default": "entropy", - "enum": [ - "entropy", - "dust" - ], + "enum": ["entropy", "dust"], "fa_icon": "fas fa-check-square", "description": "Specify the complexity filter mode for PRINSEQ++" }, @@ -355,15 +341,7 @@ "diamond_output_format": { "type": "string", "default": "tsv", - "enum": [ - "blast", - "xml", - "txt", - "daa", - "sam", - "tsv", - "paf" - ], + "enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"], "fa_icon": "fas fa-file", "description": "Specify output format from DIAMOND profiling.", "help_text": "DIAMOND can produce output in a number of different formats, you can specify here which to produce.\n\nNote that DIAMOND can only produce one format at a time, and depending on which you pick, some downstream steps may not be executed. For example, selecting `daa` or `sam` will mean you will not get a tabular taxonomic profile as with the other tools.\n\nWill be overriden by `--diamond_save_reads.`\n\n> Modifies tool parameter(s):\n> - diamond blastx: `--outfmt`" @@ -382,14 +360,7 @@ "kaiju_taxon_rank": { "type": "string", "default": "species", - "enum": [ - "phylum", - "class", - "order", - "family", - "genus", - "species" - ], + "enum": ["phylum", "class", "order", "family", "genus", "species"], "fa_icon": "fas fa-tag", "description": "Specify taxonomic rank to be displayed in Kaiju taxon table", "help_text": "Specify the taxonomic level(s) to be displayed in the resulting Kaiju taxon table, as generated by the kaiju2table helper tool.\n\nThis can be either a single level (e.g. `species`), or a comma separated list to display the full taxonomic path (e.g. `superkingdom,phylum,class,order,family,genus,species.`).\n\n> Modifies tool parameter(s):\n> - kaiju2table: `-l`" @@ -612,14 +583,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { @@ -776,10 +740,7 @@ "preprocessing_qc_tool": { "type": "string", "default": "fastqc", - "enum": [ - "fastqc", - "falco" - ], + "enum": ["fastqc", "falco"], "help_text": "Falco is designed as a drop-in replacement for FastQC but written in C++ for faster computation. We particularly recommend using falco when using long reads (due to reduced memory constraints), however is also applicable for short reads.", "description": "Specify the tool used for quality control of raw sequencing reads" } From 21a76c244943fb96dbe863c2f08b58335d0b00de Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 3 Nov 2022 14:56:26 +0100 Subject: [PATCH 09/36] Fix cardinality issue --- nextflow.config | 1 + nextflow_schema.json | 62 ++++++++------------------------- subworkflows/local/profiling.nf | 4 +-- 3 files changed, 18 insertions(+), 49 deletions(-) diff --git a/nextflow.config b/nextflow.config index 6223db0..262aa2d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -120,6 +120,7 @@ params { //krakenuniq run_krakenuniq = false + krakenuniq_ram_run_size = '16G' krakenuniq_save_reads = false // added directly to module in profiling.nf krakenuniq_save_readclassifications = false // added directly to module in profiling.nf diff --git a/nextflow_schema.json b/nextflow_schema.json index 7f52764..c40a0de 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,11 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "databases", - "outdir" - ], + "required": ["input", "databases", "outdir"], "properties": { "input": { "type": "string", @@ -84,10 +80,7 @@ "shortread_qc_tool": { "type": "string", "default": "fastp", - "enum": [ - "fastp", - "adapterremoval" - ], + "enum": ["fastp", "adapterremoval"], "fa_icon": "fas fa-tools", "description": "Specify which tool to use for short-read QC" }, @@ -140,11 +133,7 @@ "shortread_complexityfilter_tool": { "type": "string", "default": "bbduk", - "enum": [ - "bbduk", - "prinseqplusplus", - "fastp" - ], + "enum": ["bbduk", "prinseqplusplus", "fastp"], "fa_icon": "fas fa-hammer", "description": "Specify which tool to use for complexity filtering" }, @@ -178,10 +167,7 @@ "shortread_complexityfilter_prinseqplusplus_mode": { "type": "string", "default": "entropy", - "enum": [ - "entropy", - "dust" - ], + "enum": ["entropy", "dust"], "fa_icon": "fas fa-check-square", "description": "Specify the complexity filter mode for PRINSEQ++" }, @@ -355,15 +341,7 @@ "diamond_output_format": { "type": "string", "default": "tsv", - "enum": [ - "blast", - "xml", - "txt", - "daa", - "sam", - "tsv", - "paf" - ], + "enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"], "fa_icon": "fas fa-file", "description": "Specify output format from DIAMOND profiling.", "help_text": "DIAMOND can produce output in a number of different formats, you can specify here which to produce.\n\nNote that DIAMOND can only produce one format at a time, and depending on which you pick, some downstream steps may not be executed. For example, selecting `daa` or `sam` will mean you will not get a tabular taxonomic profile as with the other tools.\n\nWill be overriden by `--diamond_save_reads.`\n\n> Modifies tool parameter(s):\n> - diamond blastx: `--outfmt`" @@ -382,14 +360,7 @@ "kaiju_taxon_rank": { "type": "string", "default": "species", - "enum": [ - "phylum", - "class", - "order", - "family", - "genus", - "species" - ], + "enum": ["phylum", "class", "order", "family", "genus", "species"], "fa_icon": "fas fa-tag", "description": "Specify taxonomic rank to be displayed in Kaiju taxon table", "help_text": "Specify the taxonomic level(s) to be displayed in the resulting Kaiju taxon table, as generated by the kaiju2table helper tool.\n\nThis can be either a single level (e.g. `species`), or a comma separated list to display the full taxonomic path (e.g. `superkingdom,phylum,class,order,family,genus,species.`).\n\n> Modifies tool parameter(s):\n> - kaiju2table: `-l`" @@ -428,6 +399,13 @@ "description": "Turn on saving of KrakenUniq-aligned reads", "help_text": "Save reads that do and do not have a taxonomic classification in your output results directory in FASTQ format.\n\n> Modifies tool parameter(s):\n> - krakenuniq: `--classified-out` and `--unclassified-out`" }, + "krakenuniq_ram_run_size": { + "type": "string", + "default": "16G", + "description": "Specify how large to chunk database when loading into memory for KrakenUniq", + "fa_icon": "fas fa-database", + "help_text": "nf-core/taxprofiler utilises a 'low memory' option for KrakenUniq that can reduce the amount of RAM the process requires using the `--preloaded` option.\n\nA further extension to this option is that you can specify how large each chunk of the database should be that gets loaded into memory at any one time. You can specify the amount of RAM to chunk the database to with this parameter, and is particularly useful for people with limited computational resources.\n\nMore information about this parameter can be seen [here](https://github.com/fbreitwieser/krakenuniq/blob/master/README.md#new-release-v07).\n\n> Modifies KrakenUniq parameter: --preload\n\n> \n\n" + }, "krakenuniq_save_readclassifications": { "type": "boolean", "fa_icon": "fas fa-save", @@ -612,14 +590,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { @@ -776,10 +747,7 @@ "preprocessing_qc_tool": { "type": "string", "default": "fastqc", - "enum": [ - "fastqc", - "falco" - ], + "enum": ["fastqc", "falco"], "help_text": "Falco is designed as a drop-in replacement for FastQC but written in C++ for faster computation. We particularly recommend using falco when using long reads (due to reduced memory constraints), however is also applicable for short reads.", "description": "Specify the tool used for quality control of raw sequencing reads" } diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 73e0b73..849583e 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -274,8 +274,8 @@ workflow PROFILING { reads: [ it[0] + it[2], it[1] ] db: it[3] } - - KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads, ch_input_for_krakenuniq.db, params.krakenuniq_save_reads, params.krakenuniq_save_readclassification ) + // Hardcode to _always_ produce the report file (which is our basic otput, and goes into raw_profiles) + KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads, ch_input_for_krakenuniq.db, params.krakenuniq_ram_chunk_size, params.krakenuniq_save_reads, true, params.krakenuniq_save_readclassification ) ch_multiqc_files = ch_multiqc_files.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report ) ch_versions = ch_versions.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.versions.first() ) ch_raw_classifications = ch_raw_classifications.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.classified_assignment ) From c5e5696f84ca580a0a8c070f50223375d68d6386 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 3 Nov 2022 14:58:03 +0100 Subject: [PATCH 10/36] Fix parameter name --- nextflow.config | 2 +- nextflow_schema.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nextflow.config b/nextflow.config index 262aa2d..ce48db9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -120,7 +120,7 @@ params { //krakenuniq run_krakenuniq = false - krakenuniq_ram_run_size = '16G' + krakenuniq_ram_chunk_size = '16G' krakenuniq_save_reads = false // added directly to module in profiling.nf krakenuniq_save_readclassifications = false // added directly to module in profiling.nf diff --git a/nextflow_schema.json b/nextflow_schema.json index c40a0de..69bd4f0 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -399,7 +399,7 @@ "description": "Turn on saving of KrakenUniq-aligned reads", "help_text": "Save reads that do and do not have a taxonomic classification in your output results directory in FASTQ format.\n\n> Modifies tool parameter(s):\n> - krakenuniq: `--classified-out` and `--unclassified-out`" }, - "krakenuniq_ram_run_size": { + "krakenuniq_ram_chunk_size": { "type": "string", "default": "16G", "description": "Specify how large to chunk database when loading into memory for KrakenUniq", From 93f38b14a83dc98e4438724b7a5d8ed2bcf7e944 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 3 Nov 2022 15:01:00 +0100 Subject: [PATCH 11/36] Save report --- subworkflows/local/profiling.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 849583e..6930373 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -275,7 +275,8 @@ workflow PROFILING { db: it[3] } // Hardcode to _always_ produce the report file (which is our basic otput, and goes into raw_profiles) - KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads, ch_input_for_krakenuniq.db, params.krakenuniq_ram_chunk_size, params.krakenuniq_save_reads, true, params.krakenuniq_save_readclassification ) + save_report = true + KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads, ch_input_for_krakenuniq.db, params.krakenuniq_ram_chunk_size, params.krakenuniq_save_reads, save_report, params.krakenuniq_save_readclassification ) ch_multiqc_files = ch_multiqc_files.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report ) ch_versions = ch_versions.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.versions.first() ) ch_raw_classifications = ch_raw_classifications.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.classified_assignment ) From 461482d22a2aa36e6f82f1b2ccc4c2eb12166c34 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 3 Nov 2022 15:02:22 +0100 Subject: [PATCH 12/36] Try again wit hthe corret param --- subworkflows/local/profiling.nf | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 6930373..2cdf1d1 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -274,9 +274,8 @@ workflow PROFILING { reads: [ it[0] + it[2], it[1] ] db: it[3] } - // Hardcode to _always_ produce the report file (which is our basic otput, and goes into raw_profiles) - save_report = true - KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads, ch_input_for_krakenuniq.db, params.krakenuniq_ram_chunk_size, params.krakenuniq_save_reads, save_report, params.krakenuniq_save_readclassification ) + // Hardcode to _always_ produce the report file (which is our basic otput, and goes into) + KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads, ch_input_for_krakenuniq.db, params.krakenuniq_ram_chunk_size, params.krakenuniq_save_reads, true, params.krakenuniq_save_readclassifications ) ch_multiqc_files = ch_multiqc_files.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report ) ch_versions = ch_versions.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.versions.first() ) ch_raw_classifications = ch_raw_classifications.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.classified_assignment ) From 0f298dca4351cd4e57953ed025084c60db2e80f4 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 22 Nov 2022 12:13:08 +0100 Subject: [PATCH 13/36] group krakenuniq reads by pairs or single end --- subworkflows/local/profiling.nf | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 2cdf1d1..8c09af6 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -269,6 +269,16 @@ workflow PROFILING { if ( params.run_krakenuniq ) { ch_input_for_krakenuniq = ch_input_for_profiling.krakenuniq + .map { + meta, reads, db_meta, db -> + def meta_new = [:] + meta_new['tool'] = meta['tool'] + meta_new['single_end'] = meta['single_end'] + + [meta_new, reads, db_meta, db] + } + .groupTuple(by: [0,2,3]) + .dump(tag: "krakenuniq_premultimap") .multiMap { it -> reads: [ it[0] + it[2], it[1] ] From db334c51852d2b9a8b333df7f018405633500195 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 22 Nov 2022 12:17:08 +0100 Subject: [PATCH 14/36] Fix missing info --- subworkflows/local/profiling.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 8c09af6..51a38e3 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -272,7 +272,6 @@ workflow PROFILING { .map { meta, reads, db_meta, db -> def meta_new = [:] - meta_new['tool'] = meta['tool'] meta_new['single_end'] = meta['single_end'] [meta_new, reads, db_meta, db] From b8937748d311d863aafeed02939388c3a2853c05 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 22 Nov 2022 12:18:52 +0100 Subject: [PATCH 15/36] Fix meta.id isssue --- subworkflows/local/profiling.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 51a38e3..80e0850 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -272,6 +272,7 @@ workflow PROFILING { .map { meta, reads, db_meta, db -> def meta_new = [:] + meta['id'] = db['db_name'] meta_new['single_end'] = meta['single_end'] [meta_new, reads, db_meta, db] From 52fc00802bbde66722232b44076d5fdda7342d92 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 22 Nov 2022 12:19:56 +0100 Subject: [PATCH 16/36] Correct meta name for db meta --- subworkflows/local/profiling.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 80e0850..4fc490d 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -272,7 +272,7 @@ workflow PROFILING { .map { meta, reads, db_meta, db -> def meta_new = [:] - meta['id'] = db['db_name'] + meta['id'] = db_meta['db_name'] meta_new['single_end'] = meta['single_end'] [meta_new, reads, db_meta, db] From 5caebcc6ab38d2c6d557da77317eaba9965a5f6d Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 22 Nov 2022 12:21:59 +0100 Subject: [PATCH 17/36] Append to correct db --- subworkflows/local/profiling.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 4fc490d..e43a915 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -272,7 +272,7 @@ workflow PROFILING { .map { meta, reads, db_meta, db -> def meta_new = [:] - meta['id'] = db_meta['db_name'] + meta_new['id'] = db_meta['db_name'] meta_new['single_end'] = meta['single_end'] [meta_new, reads, db_meta, db] From 557f2a4d966ffd93137780f002a66c714d412865 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 22 Nov 2022 12:26:46 +0100 Subject: [PATCH 18/36] Extra dump --- subworkflows/local/profiling.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index e43a915..f904cc1 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -285,7 +285,7 @@ workflow PROFILING { db: it[3] } // Hardcode to _always_ produce the report file (which is our basic otput, and goes into) - KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads, ch_input_for_krakenuniq.db, params.krakenuniq_ram_chunk_size, params.krakenuniq_save_reads, true, params.krakenuniq_save_readclassifications ) + KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads.dump(tag: "krakenuniq_input"), ch_input_for_krakenuniq.db, params.krakenuniq_ram_chunk_size, params.krakenuniq_save_reads, true, params.krakenuniq_save_readclassifications ) ch_multiqc_files = ch_multiqc_files.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report ) ch_versions = ch_versions.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.versions.first() ) ch_raw_classifications = ch_raw_classifications.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.classified_assignment ) From 3102e4005257fe7ff17c24b319774d0805431897 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 22 Nov 2022 12:46:26 +0100 Subject: [PATCH 19/36] dump teh db --- subworkflows/local/profiling.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index f904cc1..a46a773 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -285,7 +285,7 @@ workflow PROFILING { db: it[3] } // Hardcode to _always_ produce the report file (which is our basic otput, and goes into) - KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads.dump(tag: "krakenuniq_input"), ch_input_for_krakenuniq.db, params.krakenuniq_ram_chunk_size, params.krakenuniq_save_reads, true, params.krakenuniq_save_readclassifications ) + KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads.dump(tag: "krakenuniq_input"), ch_input_for_krakenuniq.db.dump(tag: "krakenuniq_db"), params.krakenuniq_ram_chunk_size, params.krakenuniq_save_reads, true, params.krakenuniq_save_readclassifications ) ch_multiqc_files = ch_multiqc_files.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report ) ch_versions = ch_versions.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.versions.first() ) ch_raw_classifications = ch_raw_classifications.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.classified_assignment ) From 21759807f76bed0e34730c5b5cb90eed119ff9c2 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 22 Nov 2022 12:48:54 +0100 Subject: [PATCH 20/36] Does flatten work? --- subworkflows/local/profiling.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index a46a773..291d476 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -281,7 +281,7 @@ workflow PROFILING { .dump(tag: "krakenuniq_premultimap") .multiMap { it -> - reads: [ it[0] + it[2], it[1] ] + reads: [ it[0] + it[2], it[1].flatten() ] db: it[3] } // Hardcode to _always_ produce the report file (which is our basic otput, and goes into) From ebfc84e235b6e1a79cdcfb6787652773393ed58a Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 22 Nov 2022 13:26:54 +0100 Subject: [PATCH 21/36] Does the grouping work if we have meta? --- modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf | 1 - subworkflows/local/profiling.nf | 1 - 2 files changed, 2 deletions(-) diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf index a355c13..85d35bd 100644 --- a/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf +++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf @@ -1,5 +1,4 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { - tag "$meta.id" label 'process_high' conda (params.enable_conda ? "bioconda::krakenuniq=1.0.0" : null) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 291d476..6e45648 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -272,7 +272,6 @@ workflow PROFILING { .map { meta, reads, db_meta, db -> def meta_new = [:] - meta_new['id'] = db_meta['db_name'] meta_new['single_end'] = meta['single_end'] [meta_new, reads, db_meta, db] From e8de006ed65bfddc80d56e977880e40c3f70600c Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 22 Nov 2022 13:27:49 +0100 Subject: [PATCH 22/36] With a join --- modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf index 85d35bd..b0885c0 100644 --- a/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf +++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf @@ -87,7 +87,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { --preload $ram_chunk_size \\ --threads $task.cpus" - for fastq in ${fastqs.join(' ')}; do \\ + for fastq in "${fastqs.join('\" \"')}"; do \\ PREFIX=\$(echo \$fastq) echo "krakenuniq \\ --db $db \\ From cacd7d3128f1833cd19ec5627078e0d3570d219c Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 22 Nov 2022 13:33:13 +0100 Subject: [PATCH 23/36] Not just in stub --- modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf index b0885c0..362d3a9 100644 --- a/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf +++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf @@ -45,7 +45,7 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { --preload $ram_chunk_size \\ --threads $task.cpus - for fastq in ${fastqs.join(' ')}; do \\ + for fastq in "${fastqs.join('\" \"')}"; do \\ PREFIX=\$(echo \$fastq) krakenuniq \\ --db $db \\ From 06659af777ae59960f161222b4a01d85be64f24c Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 22 Nov 2022 13:36:40 +0100 Subject: [PATCH 24/36] Remove flatten --- subworkflows/local/profiling.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 6e45648..18b40b4 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -280,7 +280,7 @@ workflow PROFILING { .dump(tag: "krakenuniq_premultimap") .multiMap { it -> - reads: [ it[0] + it[2], it[1].flatten() ] + reads: [ it[0] + it[2], it[1] ] db: it[3] } // Hardcode to _always_ produce the report file (which is our basic otput, and goes into) From a383c9ae4099d0b4bfed75d77e830faabe26e04a Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 22 Nov 2022 13:39:21 +0100 Subject: [PATCH 25/36] Remove the join --- modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf index 362d3a9..dc9b5a4 100644 --- a/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf +++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf @@ -45,7 +45,8 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { --preload $ram_chunk_size \\ --threads $task.cpus - for fastq in "${fastqs.join('\" \"')}"; do \\ +# for fastq in "${fastqs.join('\" \"')}"; do \\ + for fastq in ${fastqs}; do \\ PREFIX=\$(echo \$fastq) krakenuniq \\ --db $db \\ From 9feb6c2db6f4d6e16d4a75ceb0e4f901fd5e5f20 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 24 Nov 2022 14:44:04 +0100 Subject: [PATCH 26/36] Update MALT --- conf/modules.config | 2 +- modules.json | 2 +- modules/nf-core/malt/run/main.nf | 8 +++----- subworkflows/local/profiling.nf | 2 +- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index f9d51d8..ea569b6 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -281,7 +281,7 @@ process { } withName: MALT_RUN { - ext.args = { "${meta.db_params}" } + ext.args = { "${meta.db_params} -m ${params.malt_mode}" } // one run with multiple samples, so fix ID to just db name to ensure clean log name ext.prefix = { "${meta.db_name}" } publishDir = [ diff --git a/modules.json b/modules.json index 62f6539..fabd8db 100644 --- a/modules.json +++ b/modules.json @@ -107,7 +107,7 @@ }, "malt/run": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + "git_sha": "6d9712f03ec2de8264a50ee4541a617e1e063b51" }, "megan/rma2info": { "branch": "master", diff --git a/modules/nf-core/malt/run/main.nf b/modules/nf-core/malt/run/main.nf index 2b91d90..2e75b4c 100644 --- a/modules/nf-core/malt/run/main.nf +++ b/modules/nf-core/malt/run/main.nf @@ -2,14 +2,13 @@ process MALT_RUN { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::malt=0.41" : null) + conda (params.enable_conda ? "bioconda::malt=0.61" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/malt:0.41--1' : - 'quay.io/biocontainers/malt:0.41--1' }" + 'https://depot.galaxyproject.org/singularity/malt:0.61--hdfd78af_0' : + 'quay.io/biocontainers/malt:0.61--hdfd78af_0' }" input: tuple val(meta), path(fastqs) - val mode path index output: @@ -38,7 +37,6 @@ process MALT_RUN { -o . \\ $args \\ --inFile ${fastqs.join(' ')} \\ - -m $mode \\ --index $index/ |&tee ${prefix}-malt-run.log cat <<-END_VERSIONS > versions.yml diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index b86c165..6cac3e1 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -95,7 +95,7 @@ workflow PROFILING { db: it[2] } - MALT_RUN ( ch_input_for_malt.reads, params.malt_mode, ch_input_for_malt.db ) + MALT_RUN ( ch_input_for_malt.reads, ch_input_for_malt.db ) ch_maltrun_for_megan = MALT_RUN.out.rma6 .transpose() From 131b39aa8b3ecfae3d18f781f7440929243ab3a6 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 24 Nov 2022 15:49:06 +0100 Subject: [PATCH 27/36] Fix linting --- modules/nf-core/malt/run/meta.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/modules/nf-core/malt/run/meta.yml b/modules/nf-core/malt/run/meta.yml index 66f2d7a..8fa1958 100644 --- a/modules/nf-core/malt/run/meta.yml +++ b/modules/nf-core/malt/run/meta.yml @@ -28,10 +28,6 @@ input: type: file description: Input FASTQ files pattern: "*.{fastq.gz,fq.gz}" - - mode: - type: string - description: Program mode - pattern: "Unknown|BlastN|BlastP|BlastX|Classifier" - index: type: directory description: Index/database directory from malt-build From a20b3e3c58a54274f908afdb231739f51eb3da4e Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 24 Nov 2022 15:51:12 +0100 Subject: [PATCH 28/36] Try to reduce MALT memory --- conf/test.config | 1 + conf/test_nopreprocessing.config | 1 + conf/test_nothing.config | 1 + conf/test_pep.config | 1 + lib/WorkflowMain.groovy | 15 ++++++++------- 5 files changed, 12 insertions(+), 7 deletions(-) diff --git a/conf/test.config b/conf/test.config index 777d9bf..03b8307 100644 --- a/conf/test.config +++ b/conf/test.config @@ -51,6 +51,7 @@ params { process { withName: MALT_RUN { maxForks = 1 + ext.args = "-m ${params.malt_mode} -J-Xmx8G" } withName: MEGAN_RMA2INFO_TSV { maxForks = 1 diff --git a/conf/test_nopreprocessing.config b/conf/test_nopreprocessing.config index 357f76f..c87cf6c 100644 --- a/conf/test_nopreprocessing.config +++ b/conf/test_nopreprocessing.config @@ -45,5 +45,6 @@ params { process { withName: MALT_RUN { maxForks = 1 + ext.args = "-m ${params.malt_mode} -J-Xmx8G" } } diff --git a/conf/test_nothing.config b/conf/test_nothing.config index df09613..0a969a6 100644 --- a/conf/test_nothing.config +++ b/conf/test_nothing.config @@ -44,5 +44,6 @@ params { process { withName: MALT_RUN { maxForks = 1 + ext.args = "-m ${params.malt_mode} -J-Xmx8G" } } diff --git a/conf/test_pep.config b/conf/test_pep.config index 762ebb3..3cd6dd0 100644 --- a/conf/test_pep.config +++ b/conf/test_pep.config @@ -37,6 +37,7 @@ params { process { withName: MALT_RUN { maxForks = 1 + ext.args = "-m ${params.malt_mode} -J-Xmx8G" } withName: MEGAN_RMA2INFO { maxForks = 1 diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 7883d70..02d4347 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -12,9 +12,9 @@ class WorkflowMain { // TODO nf-core: Add Zenodo DOI for pipeline after first release //"* The pipeline\n" + //" https://doi.org/10.5281/zenodo.XXXXXXX\n\n" + - "* The nf-core framework\n" + - " https://doi.org/10.1038/s41587-020-0439-x\n\n" + - "* Software dependencies\n" + + '* The nf-core framework\n' + + ' https://doi.org/10.1038/s41587-020-0439-x\n\n' + + '* Software dependencies\n' + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" } @@ -53,15 +53,15 @@ class WorkflowMain { System.exit(0) } + // Print parameter summary log to screen + + log.info paramsSummaryLog(workflow, params, log) + // Validate workflow parameters via the JSON schema if (params.validate_params) { NfcoreSchema.validateParameters(workflow, params, log) } - // Print parameter summary log to screen - - log.info paramsSummaryLog(workflow, params, log) - // Check that a -profile or Nextflow config has been provided to run the pipeline NfcoreTemplate.checkConfigProvided(workflow, log) @@ -90,4 +90,5 @@ class WorkflowMain { } return null } + } From 17b1e3df1c230f1767deb7cb7f3e1bdf5db09556 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 24 Nov 2022 16:53:39 +0100 Subject: [PATCH 29/36] Update test.config --- conf/test.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test.config b/conf/test.config index 03b8307..b3ad7bd 100644 --- a/conf/test.config +++ b/conf/test.config @@ -51,7 +51,7 @@ params { process { withName: MALT_RUN { maxForks = 1 - ext.args = "-m ${params.malt_mode} -J-Xmx8G" + ext.args = "-m ${params.malt_mode} -J-Xmx12G" } withName: MEGAN_RMA2INFO_TSV { maxForks = 1 From f114cc41220c9e48bc3a2f6f6c2e72e5ecade8a8 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 24 Nov 2022 16:55:19 +0100 Subject: [PATCH 30/36] Apply suggestions from code review --- conf/test.config | 2 +- conf/test_nopreprocessing.config | 2 +- conf/test_nothing.config | 2 +- conf/test_pep.config | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/conf/test.config b/conf/test.config index b3ad7bd..e79bef4 100644 --- a/conf/test.config +++ b/conf/test.config @@ -51,7 +51,7 @@ params { process { withName: MALT_RUN { maxForks = 1 - ext.args = "-m ${params.malt_mode} -J-Xmx12G" + ext.args = { "-m ${params.malt_mode} -J-Xmx12G" } } withName: MEGAN_RMA2INFO_TSV { maxForks = 1 diff --git a/conf/test_nopreprocessing.config b/conf/test_nopreprocessing.config index c87cf6c..eae6144 100644 --- a/conf/test_nopreprocessing.config +++ b/conf/test_nopreprocessing.config @@ -45,6 +45,6 @@ params { process { withName: MALT_RUN { maxForks = 1 - ext.args = "-m ${params.malt_mode} -J-Xmx8G" + ext.args = { "-m ${params.malt_mode} -J-Xmx12G" } } } diff --git a/conf/test_nothing.config b/conf/test_nothing.config index 0a969a6..ca75380 100644 --- a/conf/test_nothing.config +++ b/conf/test_nothing.config @@ -44,6 +44,6 @@ params { process { withName: MALT_RUN { maxForks = 1 - ext.args = "-m ${params.malt_mode} -J-Xmx8G" + ext.args = { "-m ${params.malt_mode} -J-Xmx12G" } } } diff --git a/conf/test_pep.config b/conf/test_pep.config index 3cd6dd0..6ce788d 100644 --- a/conf/test_pep.config +++ b/conf/test_pep.config @@ -37,7 +37,7 @@ params { process { withName: MALT_RUN { maxForks = 1 - ext.args = "-m ${params.malt_mode} -J-Xmx8G" + ext.args = { "-m ${params.malt_mode} -J-Xmx12G" } } withName: MEGAN_RMA2INFO { maxForks = 1 From ddcf38de22011a59446e5343c7bbcae430a03529 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Tue, 29 Nov 2022 10:14:20 +0100 Subject: [PATCH 31/36] refactor: make mappings explicit --- subworkflows/local/profiling.nf | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 18b40b4..4ca2c4c 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -271,17 +271,14 @@ workflow PROFILING { ch_input_for_krakenuniq = ch_input_for_profiling.krakenuniq .map { meta, reads, db_meta, db -> - def meta_new = [:] - meta_new['single_end'] = meta['single_end'] - - [meta_new, reads, db_meta, db] + [[single_end: meta.single_end], reads, db_meta, db] } .groupTuple(by: [0,2,3]) .dump(tag: "krakenuniq_premultimap") .multiMap { - it -> - reads: [ it[0] + it[2], it[1] ] - db: it[3] + single_meta, reads, db_meta, db -> + reads: [ single_meta + db_meta, reads.flatten() ] + db: db } // Hardcode to _always_ produce the report file (which is our basic otput, and goes into) KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads.dump(tag: "krakenuniq_input"), ch_input_for_krakenuniq.db.dump(tag: "krakenuniq_db"), params.krakenuniq_ram_chunk_size, params.krakenuniq_save_reads, true, params.krakenuniq_save_readclassifications ) From af8ce2cbfd98ec04e7fd9e74ea0bb2fe58fcede3 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Tue, 29 Nov 2022 10:27:35 +0100 Subject: [PATCH 32/36] chore: update krakenuniq --- modules.json | 2 +- .../krakenuniq/preloadedkrakenuniq/main.nf | 248 +++++++++++++----- .../krakenuniq/preloadedkrakenuniq/meta.yml | 7 +- 3 files changed, 182 insertions(+), 75 deletions(-) diff --git a/modules.json b/modules.json index 8956e42..404ae6d 100644 --- a/modules.json +++ b/modules.json @@ -99,7 +99,7 @@ }, "krakenuniq/preloadedkrakenuniq": { "branch": "master", - "git_sha": "13b9d4854593c03e5e25e8a8f47462542c2c0dd4" + "git_sha": "05649975c6611c6e007537a7984e186e12ae03af" }, "krona/ktimporttaxonomy": { "branch": "master", diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf index dc9b5a4..0ecacee 100644 --- a/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf +++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf @@ -1,4 +1,5 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { + tag "$meta.id" label 'process_high' conda (params.enable_conda ? "bioconda::krakenuniq=1.0.0" : null) @@ -29,90 +30,195 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ { def args = task.ext.args ?: '' def args2 = task.ext.args ?: '' - def paired = meta.single_end ? "" : "--paired" - def classified = meta.single_end ? '"\$PREFIX".classified.fastq' : '"\$PREFIX".classified#.fastq' - def unclassified = meta.single_end ? '"\$PREFIX".unclassified.fastq' : '"\$PREFIX".unclassified#.fastq' - def classified_option = save_output_fastqs ? "--classified-out ${classified}" : "" - def unclassified_option = save_output_fastqs ? "--unclassified-out ${unclassified}" : "" - def output_option = save_output ? '--output "\$PREFIX".krakenuniq.classified.txt' : "" - def report = report_file ? '--report-file "\$PREFIX".krakenuniq.report.txt' : "" - def compress_reads_command = save_output_fastqs ? "gzip --no-name *.fastq" : "" - - """ - krakenuniq \\ - $args \\ - --db $db \\ - --preload $ram_chunk_size \\ - --threads $task.cpus - -# for fastq in "${fastqs.join('\" \"')}"; do \\ - for fastq in ${fastqs}; do \\ - PREFIX=\$(echo \$fastq) + def classified = meta.single_end ? '"\${PREFIX}.classified.fastq"' : '"\${PREFIX}.classified#.fastq"' + def unclassified = meta.single_end ? '"\${PREFIX}.unclassified.fastq"' : '"\${PREFIX}.unclassified#.fastq"' + def classified_option = save_output_fastqs ? "--classified-out ${classified}" : '' + def unclassified_option = save_output_fastqs ? "--unclassified-out ${unclassified}" : '' + def output_option = save_output ? '--output "\${PREFIX}.krakenuniq.classified.txt"' : '' + def report = report_file ? '--report-file "\${PREFIX}.krakenuniq.report.txt"' : '' + def compress_reads_command = save_output_fastqs ? 'gzip --no-name *.fastq' : '' + if (meta.single_end) { + """ krakenuniq \\ --db $db \\ + --preload \\ + --preload-size $ram_chunk_size \\ --threads $task.cpus \\ - $report \\ - $output_option \\ - $unclassified_option \\ - $classified_option \\ - $output_option \\ - $paired \\ - $args2 \\ - \$fastq - done + $args - $compress_reads_command + strip_suffix() { + local result=\$1 + # Strip any file extensions. + echo "\${result%%.*}" + } - cat <<-END_VERSIONS > versions.yml - "${task.process}": - krakenuniq: \$(echo \$(krakenuniq --version 2>&1) | sed 's/^.*KrakenUniq version //; s/ .*\$//') - END_VERSIONS - """ + printf "%s\\n" ${fastqs} | while read FASTQ; do \\ + PREFIX="\$(strip_suffix "\${FASTQ}")" + + krakenuniq \\ + --db $db \\ + --threads $task.cpus \\ + $report \\ + $output_option \\ + $unclassified_option \\ + $classified_option \\ + $output_option \\ + $args2 \\ + "\${FASTQ}" + done + + $compress_reads_command + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + krakenuniq: \$(echo \$(krakenuniq --version 2>&1) | sed 's/^.*KrakenUniq version //; s/ .*\$//') + END_VERSIONS + """ + } else { + """ + krakenuniq \\ + --db $db \\ + --preload \\ + --preload-size $ram_chunk_size \\ + --threads $task.cpus \\ + $args + + strip_suffix() { + local result + read result + # Strip any trailing dot or underscore. + result="\${result%_}" + echo "\${result%.}" + } + + printf "%s %s\\n" ${fastqs} | while read FASTQ; do \\ + read -r -a FASTQ <<< "\${FASTQ}" + PREFIX="\$(printf "%s\\n" "\${FASTQ[@]}" | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' | strip_suffix)" + + krakenuniq \\ + --db $db \\ + --threads $task.cpus \\ + $report \\ + $output_option \\ + $unclassified_option \\ + $classified_option \\ + $output_option \\ + --paired \\ + $args2 \\ + "\${FASTQ[@]}" + done + + $compress_reads_command + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + krakenuniq: \$(echo \$(krakenuniq --version 2>&1) | sed 's/^.*KrakenUniq version //; s/ .*\$//') + END_VERSIONS + """ + } stub: def args = task.ext.args ?: '' def args2 = task.ext.args ?: '' - def paired = meta.single_end ? "" : "--paired" - def classified = meta.single_end ? '"\$PREFIX".classified.fastq' : '"\$PREFIX".classified#.fastq' - def unclassified = meta.single_end ? '"\$PREFIX".unclassified.fastq' : '"\$PREFIX".unclassified#.fastq' - def classified_option = save_output_fastqs ? "--classified-out ${classified}" : "" - def unclassified_option = save_output_fastqs ? "--unclassified-out ${unclassified}" : "" - def output_option = save_output ? '--output "\$PREFIX".krakenuniq.classified.txt' : "" - def report = report_file ? '--report-file "\$PREFIX".krakenuniq.report.txt' : "" - def compress_reads_command = save_output_fastqs ? "echo 'gzip --no-name *.fastq'" : "" - """ - echo "krakenuniq \\ - $args \\ - --db $db \\ - --preload $ram_chunk_size \\ - --threads $task.cpus" - - for fastq in "${fastqs.join('\" \"')}"; do \\ - PREFIX=\$(echo \$fastq) - echo "krakenuniq \\ + def classified = meta.single_end ? '"\${PREFIX}.classified.fastq"' : '"\${PREFIX}.classified#.fastq"' + def unclassified = meta.single_end ? '"\${PREFIX}.unclassified.fastq"' : '"\${PREFIX}.unclassified#.fastq"' + def classified_option = save_output_fastqs ? "--classified-out ${classified}" : '' + def unclassified_option = save_output_fastqs ? "--unclassified-out ${unclassified}" : '' + def output_option = save_output ? '--output "\${PREFIX}.krakenuniq.classified.txt"' : '' + def report = report_file ? '--report-file "\${PREFIX}.krakenuniq.report.txt"' : '' + def compress_reads_command = save_output_fastqs ? 'gzip --no-name *.fastq' : '' + if (meta.single_end) { + """ + echo krakenuniq \\ --db $db \\ + --preload \\ + --preload-size $ram_chunk_size \\ --threads $task.cpus \\ - $report \\ - $output_option \\ - $unclassified_option \\ - $classified_option \\ - $output_option \\ - $paired \\ - $args2 \\ - \$fastq" + $args - touch "\$PREFIX".classified.fastq.gz - touch "\$PREFIX".krakenuniq.classified.txt - touch "\$PREFIX".krakenuniq.report.txt - touch "\$PREFIX".unclassified.fastq.gz - done + strip_suffix() { + local result=\$1 + # Strip any file extensions. + echo "\${result%%.*}" + } - $compress_reads_command + printf "%s\\n" ${fastqs} | while read FASTQ; do \\ + echo "\${FASTQ}" + PREFIX="\$(strip_suffix "\${FASTQ}")" + echo "\${PREFIX}" - cat <<-END_VERSIONS > versions.yml - "${task.process}": - krakenuniq: \$(echo \$(krakenuniq --version 2>&1) | sed 's/^.*KrakenUniq version //; s/ .*\$//') - END_VERSIONS - """ + echo krakenuniq \\ + --db $db \\ + --threads $task.cpus \\ + $report \\ + $output_option \\ + $unclassified_option \\ + $classified_option \\ + $output_option \\ + $args2 \\ + "\${FASTQ}" + + touch "\${PREFIX}.classified.fastq.gz" + touch "\${PREFIX}.krakenuniq.classified.txt" + touch "\${PREFIX}.krakenuniq.report.txt" + touch "\${PREFIX}.unclassified.fastq.gz" + done + + echo $compress_reads_command + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + krakenuniq: \$(echo \$(krakenuniq --version 2>&1) | sed 's/^.*KrakenUniq version //; s/ .*\$//') + END_VERSIONS + """ + } else { + """ + echo krakenuniq \\ + --db $db \\ + --preload \\ + --preload-size $ram_chunk_size \\ + --threads $task.cpus \\ + $args + + strip_suffix() { + local result + read result + # Strip any trailing dot or underscore. + result="\${result%_}" + echo "\${result%.}" + } + + printf "%s %s\\n" ${fastqs} | while read FASTQ; do \\ + read -r -a FASTQ <<< "\${FASTQ}" + echo "\${FASTQ[@]}" + PREFIX="\$(printf "%s\\n" "\${FASTQ[@]}" | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' | strip_suffix)" + echo "\${PREFIX}" + + echo krakenuniq \\ + --db $db \\ + --threads $task.cpus \\ + $report \\ + $output_option \\ + $unclassified_option \\ + $classified_option \\ + $output_option \\ + --paired \\ + $args2 \\ + "\${FASTQ[@]}" + + touch "\${PREFIX}.classified_1.fastq.gz" "\${PREFIX}.classified_2.fastq.gz" + touch "\${PREFIX}.krakenuniq.classified.txt" + touch "\${PREFIX}.krakenuniq.report.txt" + touch "\${PREFIX}.unclassified_1.fastq.gz" "\${PREFIX}.unclassified_2.fastq.gz" + done + + echo $compress_reads_command + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + krakenuniq: \$(echo \$(krakenuniq --version 2>&1) | sed 's/^.*KrakenUniq version //; s/ .*\$//') + END_VERSIONS + """ + } } diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml b/modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml index de788af..4ac645c 100644 --- a/modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml +++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml @@ -51,13 +51,13 @@ output: description: | Reads classified as belonging to any of the taxa on the KrakenUniq database. - pattern: "*{fastq.gz}" + pattern: "*.fastq.gz" - unclassified_reads_fastq: type: file description: | Reads not classified to any of the taxa on the KrakenUniq database. - pattern: "*{fastq.gz}" + pattern: "*.fastq.gz" - classified_assignment: type: file description: | @@ -68,10 +68,11 @@ output: description: | KrakenUniq report containing stats about classified and not classifed reads. - pattern: "*.{report.txt}" + pattern: "*.report.txt" - versions: type: file description: File containing software versions pattern: "versions.yml" authors: - "@mjamy" + - "@Midnighter" From d7fbe55849f79e4029c746b5457c0ab2d4ba1a98 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 29 Nov 2022 14:19:01 +0100 Subject: [PATCH 33/36] Add KU to the test profiles and add id to KU processes --- conf/test.config | 1 + conf/test_motus.config | 1 + conf/test_nopreprocessing.config | 1 + conf/test_noprofiling.config | 1 + conf/test_nothing.config | 1 + subworkflows/local/profiling.nf | 2 +- 6 files changed, 6 insertions(+), 1 deletion(-) diff --git a/conf/test.config b/conf/test.config index 777d9bf..016eb2e 100644 --- a/conf/test.config +++ b/conf/test.config @@ -39,6 +39,7 @@ params { run_metaphlan3 = true run_centrifuge = true run_diamond = true + run_krakenuniq = true run_motus = false run_krona = true krona_taxonomy_directory = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab' diff --git a/conf/test_motus.config b/conf/test_motus.config index d167b94..2428a4c 100644 --- a/conf/test_motus.config +++ b/conf/test_motus.config @@ -38,6 +38,7 @@ params { run_metaphlan3 = false run_centrifuge = false run_diamond = false + run_krakenuniq = false run_motus = true run_profile_standardisation = true } diff --git a/conf/test_nopreprocessing.config b/conf/test_nopreprocessing.config index 357f76f..9a51320 100644 --- a/conf/test_nopreprocessing.config +++ b/conf/test_nopreprocessing.config @@ -38,6 +38,7 @@ params { run_metaphlan3 = true run_centrifuge = true run_diamond = true + run_krakenuniq = true run_motus = false run_krona = true } diff --git a/conf/test_noprofiling.config b/conf/test_noprofiling.config index 59ed0da..3ca715b 100644 --- a/conf/test_noprofiling.config +++ b/conf/test_noprofiling.config @@ -39,6 +39,7 @@ params { run_metaphlan3 = false run_centrifuge = false run_diamond = false + run_krakenuniq = false run_motus = false } diff --git a/conf/test_nothing.config b/conf/test_nothing.config index df09613..47976df 100644 --- a/conf/test_nothing.config +++ b/conf/test_nothing.config @@ -38,6 +38,7 @@ params { run_metaphlan3 = false run_centrifuge = false run_diamond = false + run_krakenuniq = false run_motus = false } diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 4ca2c4c..aeee2a4 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -271,7 +271,7 @@ workflow PROFILING { ch_input_for_krakenuniq = ch_input_for_profiling.krakenuniq .map { meta, reads, db_meta, db -> - [[single_end: meta.single_end], reads, db_meta, db] + [[id: db_meta.db_name, single_end: meta.single_end], reads, db_meta, db] } .groupTuple(by: [0,2,3]) .dump(tag: "krakenuniq_premultimap") From bc98f5b80f24b77d3168412e81e7e7feaf043bc0 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 1 Dec 2022 10:37:14 +0100 Subject: [PATCH 34/36] Separate KrakenUniq test run --- .github/workflows/ci.yml | 33 +++++++++++++++++ conf/test_krakenuniq.config | 72 +++++++++++++++++++++++++++++++++++++ conf/test_motus.config | 4 +++ nextflow.config | 1 + 4 files changed, 110 insertions(+) create mode 100644 conf/test_krakenuniq.config diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a2a1662..77440b7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -101,3 +101,36 @@ jobs: with: command: nextflow run ${GITHUB_WORKSPACE} -profile test_motus,docker --outdir ./results --databases ./database_motus.csv attempt_limit: 3 + + krakenuniq: + name: Test mOTUs with workflow parameters + if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/taxprofiler') }} + runs-on: ubuntu-latest + strategy: + matrix: + NXF_VER: + - "21.10.3" + - "latest-everything" + + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Install Nextflow + uses: nf-core/setup-nextflow@v1 + with: + version: "${{ matrix.NXF_VER }}" + + - name: Show current locale + run: locale + + - name: Set UTF-8 enabled locale + run: | + sudo locale-gen en_US.UTF-8 + sudo update-locale LANG=en_US.UTF-8 + + - name: Run pipeline with test data + uses: Wandalen/wretry.action@v1.0.11 + with: + command: nextflow run ${GITHUB_WORKSPACE} -profile test_krakenuniq,docker --outdir ./results + attempt_limit: 3 diff --git a/conf/test_krakenuniq.config b/conf/test_krakenuniq.config new file mode 100644 index 0000000..c6e2548 --- /dev/null +++ b/conf/test_krakenuniq.config @@ -0,0 +1,72 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/taxprofiler -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +// +// Separate test as KrakenUniq database can sometimes be too big for GHA +// + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test to check mOTUs function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets + // TODO nf-core: Give any required params for the test so that command line flags are not needed + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' + databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_krakenuniq.csv' + perform_shortread_qc = true + perform_longread_qc = true + shortread_qc_mergepairs = true + perform_shortread_complexityfilter = true + perform_shortread_hostremoval = true + perform_longread_hostremoval = true + perform_runmerging = true + hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' + run_kaiju = false` + run_kraken2 = false` + run_bracken = false + run_malt = false + run_metaphlan3 = false + run_centrifuge = false + run_diamond = false + run_krakenuniq = true + run_motus = false + run_krona = true + krona_taxonomy_directory = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab' + malt_save_reads = true + kraken2_save_reads = true + centrifuge_save_reads = true + diamond_save_reads = true +} + +process { + withName: MALT_RUN { + maxForks = 1 + } + withName: MEGAN_RMA2INFO_TSV { + maxForks = 1 + } + withName: MEGAN_RMA2INFO_KRONA { + maxForks = 1 + } + withName: 'EIDO_VALIDATE' { + ext.args = '--st-index sample' + } + withName: 'EIDO_CONVERT' { + ext.args = '--st-index sample' + } +} diff --git a/conf/test_motus.config b/conf/test_motus.config index 2428a4c..a077dad 100644 --- a/conf/test_motus.config +++ b/conf/test_motus.config @@ -10,6 +10,10 @@ ---------------------------------------------------------------------------------------- */ +// +// Separate test as mOTUs database download can be flaky +// + params { config_profile_name = 'mOTUs Test profile' config_profile_description = 'Minimal test to check mOTUs function' diff --git a/nextflow.config b/nextflow.config index 9ad8def..721a21c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -242,6 +242,7 @@ profiles { test_nopreprocessing { includeConfig 'conf/test_nopreprocessing.config' } test_nothing { includeConfig 'conf/test_nothing.config' } test_motus { includeConfig 'conf/test_motus.config' } + test_krakenuniq { includeConfig 'conf/test_krakenuniq.config' } test_pep { includeConfig 'conf/test_pep.config' } } From ef1d2da17917abd883d8937197fc92bb63ab372c Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 1 Dec 2022 12:53:34 +0100 Subject: [PATCH 35/36] Apply suggestions from code review Co-authored-by: Moritz E. Beber --- .github/workflows/ci.yml | 2 +- conf/test_krakenuniq.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 77440b7..d179c6e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -103,7 +103,7 @@ jobs: attempt_limit: 3 krakenuniq: - name: Test mOTUs with workflow parameters + name: Test KrakenUniq with workflow parameters if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/taxprofiler') }} runs-on: ubuntu-latest strategy: diff --git a/conf/test_krakenuniq.config b/conf/test_krakenuniq.config index c6e2548..c504ec9 100644 --- a/conf/test_krakenuniq.config +++ b/conf/test_krakenuniq.config @@ -16,7 +16,7 @@ params { config_profile_name = 'Test profile' - config_profile_description = 'Minimal test to check mOTUs function' + config_profile_description = 'Minimal test to check KrakenUniq function' // Limit resources so that this can run on GitHub Actions max_cpus = 2 From f960c14caf6cf9625f336fbaecad7acfee806d37 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 1 Dec 2022 13:46:57 +0100 Subject: [PATCH 36/36] Fix typos --- conf/test_krakenuniq.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/test_krakenuniq.config b/conf/test_krakenuniq.config index c504ec9..67b559e 100644 --- a/conf/test_krakenuniq.config +++ b/conf/test_krakenuniq.config @@ -36,8 +36,8 @@ params { perform_longread_hostremoval = true perform_runmerging = true hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' - run_kaiju = false` - run_kraken2 = false` + run_kaiju = false + run_kraken2 = false run_bracken = false run_malt = false run_metaphlan3 = false