From 21a76c244943fb96dbe863c2f08b58335d0b00de Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 3 Nov 2022 14:56:26 +0100 Subject: [PATCH] Fix cardinality issue --- nextflow.config | 1 + nextflow_schema.json | 62 ++++++++------------------------- subworkflows/local/profiling.nf | 4 +-- 3 files changed, 18 insertions(+), 49 deletions(-) diff --git a/nextflow.config b/nextflow.config index 6223db0..262aa2d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -120,6 +120,7 @@ params { //krakenuniq run_krakenuniq = false + krakenuniq_ram_run_size = '16G' krakenuniq_save_reads = false // added directly to module in profiling.nf krakenuniq_save_readclassifications = false // added directly to module in profiling.nf diff --git a/nextflow_schema.json b/nextflow_schema.json index 7f52764..c40a0de 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,11 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "databases", - "outdir" - ], + "required": ["input", "databases", "outdir"], "properties": { "input": { "type": "string", @@ -84,10 +80,7 @@ "shortread_qc_tool": { "type": "string", "default": "fastp", - "enum": [ - "fastp", - "adapterremoval" - ], + "enum": ["fastp", "adapterremoval"], "fa_icon": "fas fa-tools", "description": "Specify which tool to use for short-read QC" }, @@ -140,11 +133,7 @@ "shortread_complexityfilter_tool": { "type": "string", "default": "bbduk", - "enum": [ - "bbduk", - "prinseqplusplus", - "fastp" - ], + "enum": ["bbduk", "prinseqplusplus", "fastp"], "fa_icon": "fas fa-hammer", "description": "Specify which tool to use for complexity filtering" }, @@ -178,10 +167,7 @@ "shortread_complexityfilter_prinseqplusplus_mode": { "type": "string", "default": "entropy", - "enum": [ - "entropy", - "dust" - ], + "enum": ["entropy", "dust"], "fa_icon": "fas fa-check-square", "description": "Specify the complexity filter mode for PRINSEQ++" }, @@ -355,15 +341,7 @@ "diamond_output_format": { "type": "string", "default": "tsv", - "enum": [ - "blast", - "xml", - "txt", - "daa", - "sam", - "tsv", - "paf" - ], + "enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"], "fa_icon": "fas fa-file", "description": "Specify output format from DIAMOND profiling.", "help_text": "DIAMOND can produce output in a number of different formats, you can specify here which to produce.\n\nNote that DIAMOND can only produce one format at a time, and depending on which you pick, some downstream steps may not be executed. For example, selecting `daa` or `sam` will mean you will not get a tabular taxonomic profile as with the other tools.\n\nWill be overriden by `--diamond_save_reads.`\n\n> Modifies tool parameter(s):\n> - diamond blastx: `--outfmt`" @@ -382,14 +360,7 @@ "kaiju_taxon_rank": { "type": "string", "default": "species", - "enum": [ - "phylum", - "class", - "order", - "family", - "genus", - "species" - ], + "enum": ["phylum", "class", "order", "family", "genus", "species"], "fa_icon": "fas fa-tag", "description": "Specify taxonomic rank to be displayed in Kaiju taxon table", "help_text": "Specify the taxonomic level(s) to be displayed in the resulting Kaiju taxon table, as generated by the kaiju2table helper tool.\n\nThis can be either a single level (e.g. `species`), or a comma separated list to display the full taxonomic path (e.g. `superkingdom,phylum,class,order,family,genus,species.`).\n\n> Modifies tool parameter(s):\n> - kaiju2table: `-l`" @@ -428,6 +399,13 @@ "description": "Turn on saving of KrakenUniq-aligned reads", "help_text": "Save reads that do and do not have a taxonomic classification in your output results directory in FASTQ format.\n\n> Modifies tool parameter(s):\n> - krakenuniq: `--classified-out` and `--unclassified-out`" }, + "krakenuniq_ram_run_size": { + "type": "string", + "default": "16G", + "description": "Specify how large to chunk database when loading into memory for KrakenUniq", + "fa_icon": "fas fa-database", + "help_text": "nf-core/taxprofiler utilises a 'low memory' option for KrakenUniq that can reduce the amount of RAM the process requires using the `--preloaded` option.\n\nA further extension to this option is that you can specify how large each chunk of the database should be that gets loaded into memory at any one time. You can specify the amount of RAM to chunk the database to with this parameter, and is particularly useful for people with limited computational resources.\n\nMore information about this parameter can be seen [here](https://github.com/fbreitwieser/krakenuniq/blob/master/README.md#new-release-v07).\n\n> Modifies KrakenUniq parameter: --preload\n\n> \n\n" + }, "krakenuniq_save_readclassifications": { "type": "boolean", "fa_icon": "fas fa-save", @@ -612,14 +590,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { @@ -776,10 +747,7 @@ "preprocessing_qc_tool": { "type": "string", "default": "fastqc", - "enum": [ - "fastqc", - "falco" - ], + "enum": ["fastqc", "falco"], "help_text": "Falco is designed as a drop-in replacement for FastQC but written in C++ for faster computation. We particularly recommend using falco when using long reads (due to reduced memory constraints), however is also applicable for short reads.", "description": "Specify the tool used for quality control of raw sequencing reads" } diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 73e0b73..849583e 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -274,8 +274,8 @@ workflow PROFILING { reads: [ it[0] + it[2], it[1] ] db: it[3] } - - KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads, ch_input_for_krakenuniq.db, params.krakenuniq_save_reads, params.krakenuniq_save_readclassification ) + // Hardcode to _always_ produce the report file (which is our basic otput, and goes into raw_profiles) + KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads, ch_input_for_krakenuniq.db, params.krakenuniq_ram_chunk_size, params.krakenuniq_save_reads, true, params.krakenuniq_save_readclassification ) ch_multiqc_files = ch_multiqc_files.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report ) ch_versions = ch_versions.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.versions.first() ) ch_raw_classifications = ch_raw_classifications.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.classified_assignment )