From 76a662ecc5e6a63b569b887df52ac41d1946cb7d Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 27 Oct 2022 12:17:19 +0000 Subject: [PATCH 1/7] Fix adapterremoval adapter conflict and add optional adapter list file --- conf/modules.config | 8 +-- modules.json | 2 +- modules/nf-core/adapterremoval/main.nf | 4 +- nextflow.config | 3 +- nextflow_schema.json | 61 +++++++++++++++---- .../local/shortread_adapterremoval.nf | 5 +- subworkflows/local/shortread_preprocessing.nf | 1 + workflows/taxprofiler.nf | 9 ++- 8 files changed, 69 insertions(+), 24 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index b1d7b87..9a351cc 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -100,8 +100,7 @@ process { withName: ADAPTERREMOVAL_SINGLE { ext.args = [ // trimming options - params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "", - params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "", + params.shortread_qc_skipadaptertrim ? "--adapter1 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "", // filtering options "--minlength ${params.shortread_qc_minlength}" ].join(' ').trim() @@ -119,9 +118,8 @@ process { // collapsing options params.shortread_qc_mergepairs ? "--collapse" : "", // trimming options - params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "", - params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "", - params.shortread_qc_adapter2 ? "--adapter2 ${params.shortread_qc_adapter2}" : "", + params.shortread_qc_skipadaptertrim ? "--adapter1 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "", // adding adapter list happens at module input channel level + params.shortread_qc_skipadaptertrim ? "--adapter2 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter2 ? "--adapter2 ${params.shortread_qc_adapter2}" : "", // filtering options "--minlength ${params.shortread_qc_minlength}" ].join(' ').trim() diff --git a/modules.json b/modules.json index aa77c4e..ab32eec 100644 --- a/modules.json +++ b/modules.json @@ -7,7 +7,7 @@ "nf-core": { "adapterremoval": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + "git_sha": "ce7cf27e377fdacf7ebe8e75903ec70405ea1659" }, "bbmap/bbduk": { "branch": "master", diff --git a/modules/nf-core/adapterremoval/main.nf b/modules/nf-core/adapterremoval/main.nf index 0e17c05..643c141 100644 --- a/modules/nf-core/adapterremoval/main.nf +++ b/modules/nf-core/adapterremoval/main.nf @@ -34,7 +34,7 @@ process ADAPTERREMOVAL { AdapterRemoval \\ --file1 $reads \\ $args \\ - $adapterlist \\ + $list \\ --basename ${prefix} \\ --threads ${task.cpus} \\ --seed 42 \\ @@ -61,7 +61,7 @@ process ADAPTERREMOVAL { --file1 ${reads[0]} \\ --file2 ${reads[1]} \\ $args \\ - $adapterlist \\ + $list \\ --basename ${prefix} \\ --threads $task.cpus \\ --seed 42 \\ diff --git a/nextflow.config b/nextflow.config index b69948e..9bf7f83 100644 --- a/nextflow.config +++ b/nextflow.config @@ -59,9 +59,9 @@ params { // Databases databases = null + // FASTQ preprocessing preprocessing_qc_tool = 'fastqc' - // FASTQ preprocessing perform_shortread_qc = false shortread_qc_tool = 'fastp' shortread_qc_skipadaptertrim = false @@ -69,6 +69,7 @@ params { shortread_qc_excludeunmerged = false shortread_qc_adapter1 = null shortread_qc_adapter2 = null + shortread_qc_adapterlist = null // TODO Add for FASTP shortread_qc_minlength = 15 perform_longread_qc = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 4479f54..59131b6 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,11 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir", "databases"], + "required": [ + "input", + "databases", + "outdir" + ], "properties": { "input": { "type": "string", @@ -80,7 +84,10 @@ "shortread_qc_tool": { "type": "string", "default": "fastp", - "enum": ["fastp", "adapterremoval"], + "enum": [ + "fastp", + "adapterremoval" + ], "fa_icon": "fas fa-tools", "description": "Specify which tool to use for short-read QC" }, @@ -104,6 +111,12 @@ "description": "Specify adapter 2 nucleotide sequence", "help_text": "Specify a custom reverse or R2 adapter sequence to be removed from reads. \n\nIf not set, the selected short-read QC tool's defaults will be used.\n\n> Modifies tool parameter(s):\n> - fastp: `--adapter_sequence`. fastp default: `AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT`\n> - AdapterRemoval: `--adapter1`. AdapteRemoval2 default: `AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT`" }, + "shortread_qc_adapterlist": { + "type": "string", + "default": "None", + "fa_icon": "fas fa-list-ul", + "help_text": "This affects AdapterRemoval. It will replace default adapters or whatever is supplied in `--shortread_qc_adapter{1,2}`.\n\nThis allows you to mix libraries built with different adapters into one run.\n\n> Modifies tool parameter(s):\n> - AdapterRemoval: `--adapter-list`." + }, "shortread_qc_mergepairs": { "type": "boolean", "fa_icon": "fas fa-toggle-on", @@ -133,7 +146,11 @@ "shortread_complexityfilter_tool": { "type": "string", "default": "bbduk", - "enum": ["bbduk", "prinseqplusplus", "fastp"], + "enum": [ + "bbduk", + "prinseqplusplus", + "fastp" + ], "fa_icon": "fas fa-hammer", "description": "Specify which tool to use for complexity filtering" }, @@ -167,7 +184,10 @@ "shortread_complexityfilter_prinseqplusplus_mode": { "type": "string", "default": "entropy", - "enum": ["entropy", "dust"], + "enum": [ + "entropy", + "dust" + ], "fa_icon": "fas fa-check-square", "description": "Specify the complexity filter mode for PRINSEQ++" }, @@ -341,7 +361,15 @@ "diamond_output_format": { "type": "string", "default": "tsv", - "enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"], + "enum": [ + "blast", + "xml", + "txt", + "daa", + "sam", + "tsv", + "paf" + ], "fa_icon": "fas fa-file", "description": "Specify output format from DIAMOND profiling.", "help_text": "DIAMOND can produce output in a number of different formats, you can specify here which to produce.\n\nNote that DIAMOND can only produce one format at a time, and depending on which you pick, some downstream steps may not be executed. For example, selecting `daa` or `sam` will mean you will not get a tabular taxonomic profile as with the other tools.\n\nWill be overriden by `--diamond_save_reads.`\n\n> Modifies tool parameter(s):\n> - diamond blastx: `--outfmt`" @@ -360,7 +388,14 @@ "kaiju_taxon_rank": { "type": "string", "default": "species", - "enum": ["phylum", "class", "order", "family", "genus", "species"], + "enum": [ + "phylum", + "class", + "order", + "family", + "genus", + "species" + ], "fa_icon": "fas fa-tag", "description": "Specify taxonomic rank to be displayed in Kaiju taxon table", "help_text": "Specify the taxonomic level(s) to be displayed in the resulting Kaiju taxon table, as generated by the kaiju2table helper tool.\n\nThis can be either a single level (e.g. `species`), or a comma separated list to display the full taxonomic path (e.g. `superkingdom,phylum,class,order,family,genus,species.`).\n\n> Modifies tool parameter(s):\n> - kaiju2table: `-l`" @@ -555,7 +590,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, "email_on_fail": { @@ -711,10 +753,7 @@ "properties": { "preprocessing_qc_tool": { "type": "string", - "default": "fastqc", - "enum": ["fastqc", "falco"], - "help_text": "Falco is designed as a drop-in replacement for FastQC but written in C++ for faster computation. We particularly recommend using falco when using long reads (due to reduced memory constraints), however is also applicable for short reads.", - "description": "Specify the tool used for quality control of raw sequencing reads" + "default": "fastqc" } } } diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf index a5a43fe..6567007 100644 --- a/subworkflows/local/shortread_adapterremoval.nf +++ b/subworkflows/local/shortread_adapterremoval.nf @@ -10,6 +10,7 @@ workflow SHORTREAD_ADAPTERREMOVAL { take: reads // [[meta], [reads]] + adapterlist // file main: ch_versions = Channel.empty() @@ -21,8 +22,8 @@ workflow SHORTREAD_ADAPTERREMOVAL { paired: !it[0].single_end } - ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, [] ) - ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, [] ) + ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, adapterlist ) + ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, adapterlist ) /* * Due to the ~slightly~ very ugly output implementation of the current AdapterRemoval2 version, each file diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index f2cd738..6e14423 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -11,6 +11,7 @@ include { FALCO as FALCO_PROCESSED } from '../../modules/nf-core/falco/main' workflow SHORTREAD_PREPROCESSING { take: reads // [ [ meta ], [ reads ] ] + adapterlist // file main: ch_versions = Channel.empty() diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 1ac519c..e6ccc16 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -12,7 +12,8 @@ WorkflowTaxprofiler.initialise(params, log) // TODO nf-core: Add all file path parameters for the pipeline to the list below // Check input path parameters to see if they exist def checkPathParamList = [ params.input, params.databases, params.hostremoval_reference, - params.shortread_hostremoval_index, params.multiqc_config + params.shortread_hostremoval_index, params.multiqc_config, + params.shortread_qc_adapterlist ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } @@ -102,6 +103,7 @@ workflow TAXPROFILER { ch_versions = Channel.empty() ch_multiqc_logo= Channel.fromPath("$projectDir/docs/images/nf-core-taxprofiler_logo_custom_light.png") + ch_adapterlist_for_shortreadqc = params.shortread_qc_adapterlist ? file(params.shortread_qc_adapterlist) : [] /* SUBWORKFLOW: Read in samplesheet, validate and stage input files @@ -131,8 +133,11 @@ workflow TAXPROFILER { /* SUBWORKFLOW: PERFORM PREPROCESSING */ + + ch_adapterlist_for_shortreadqc + if ( params.perform_shortread_qc ) { - ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq ).reads + ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq, ch_adapterlist_for_shortreadqc ).reads ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions ) } else { ch_shortreads_preprocessed = INPUT_CHECK.out.fastq From 733d3dc8c57078a7c2032ef028429aefc552e3de Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 27 Oct 2022 14:19:28 +0200 Subject: [PATCH 2/7] Apply suggestions from code review --- nextflow_schema.json | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 59131b6..5c60775 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -753,7 +753,10 @@ "properties": { "preprocessing_qc_tool": { "type": "string", - "default": "fastqc" + "default": "fastqc", + "enum": ["fastqc", "falco"], + "help_text": "Falco is designed as a drop-in replacement for FastQC but written in C++ for faster computation. We particularly recommend using falco when using long reads (due to reduced memory constraints), however is also applicable for short reads.", + "description": "Specify the tool used for quality control of raw sequencing reads" } } } From a402ee3be200089757096207dba17cb1b4ffb5a6 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Thu, 27 Oct 2022 12:20:08 +0000 Subject: [PATCH 3/7] [automated] Fix linting with Prettier --- nextflow_schema.json | 50 +++++++------------------------------------- 1 file changed, 7 insertions(+), 43 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 5c60775..04ba938 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,11 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "databases", - "outdir" - ], + "required": ["input", "databases", "outdir"], "properties": { "input": { "type": "string", @@ -84,10 +80,7 @@ "shortread_qc_tool": { "type": "string", "default": "fastp", - "enum": [ - "fastp", - "adapterremoval" - ], + "enum": ["fastp", "adapterremoval"], "fa_icon": "fas fa-tools", "description": "Specify which tool to use for short-read QC" }, @@ -146,11 +139,7 @@ "shortread_complexityfilter_tool": { "type": "string", "default": "bbduk", - "enum": [ - "bbduk", - "prinseqplusplus", - "fastp" - ], + "enum": ["bbduk", "prinseqplusplus", "fastp"], "fa_icon": "fas fa-hammer", "description": "Specify which tool to use for complexity filtering" }, @@ -184,10 +173,7 @@ "shortread_complexityfilter_prinseqplusplus_mode": { "type": "string", "default": "entropy", - "enum": [ - "entropy", - "dust" - ], + "enum": ["entropy", "dust"], "fa_icon": "fas fa-check-square", "description": "Specify the complexity filter mode for PRINSEQ++" }, @@ -361,15 +347,7 @@ "diamond_output_format": { "type": "string", "default": "tsv", - "enum": [ - "blast", - "xml", - "txt", - "daa", - "sam", - "tsv", - "paf" - ], + "enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"], "fa_icon": "fas fa-file", "description": "Specify output format from DIAMOND profiling.", "help_text": "DIAMOND can produce output in a number of different formats, you can specify here which to produce.\n\nNote that DIAMOND can only produce one format at a time, and depending on which you pick, some downstream steps may not be executed. For example, selecting `daa` or `sam` will mean you will not get a tabular taxonomic profile as with the other tools.\n\nWill be overriden by `--diamond_save_reads.`\n\n> Modifies tool parameter(s):\n> - diamond blastx: `--outfmt`" @@ -388,14 +366,7 @@ "kaiju_taxon_rank": { "type": "string", "default": "species", - "enum": [ - "phylum", - "class", - "order", - "family", - "genus", - "species" - ], + "enum": ["phylum", "class", "order", "family", "genus", "species"], "fa_icon": "fas fa-tag", "description": "Specify taxonomic rank to be displayed in Kaiju taxon table", "help_text": "Specify the taxonomic level(s) to be displayed in the resulting Kaiju taxon table, as generated by the kaiju2table helper tool.\n\nThis can be either a single level (e.g. `species`), or a comma separated list to display the full taxonomic path (e.g. `superkingdom,phylum,class,order,family,genus,species.`).\n\n> Modifies tool parameter(s):\n> - kaiju2table: `-l`" @@ -590,14 +561,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { From 08d61065e5fe4bbae15b93690bf77331be89785e Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 27 Oct 2022 12:23:27 +0000 Subject: [PATCH 4/7] Add adapterlist to AR2 subworkflow --- subworkflows/local/shortread_preprocessing.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index 6e14423..d381d34 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -22,7 +22,7 @@ workflow SHORTREAD_PREPROCESSING { ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions ) ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc ) } else if ( params.shortread_qc_tool == "adapterremoval" ) { - ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads + ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads, adapterlist ).reads ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions ) ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc ) } else { From d487a1ff2fac2e0a4bf9b3aecc41792000256b6f Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Wed, 2 Nov 2022 12:39:34 +0100 Subject: [PATCH 5/7] Addd FASTP adadapter list --- conf/modules.config | 6 +++--- modules.json | 2 +- modules/nf-core/fastp/main.nf | 5 +++++ modules/nf-core/fastp/meta.yml | 4 ++++ nextflow_schema.json | 4 ++-- subworkflows/local/shortread_fastp.nf | 5 +++-- subworkflows/local/shortread_preprocessing.nf | 2 +- workflows/taxprofiler.nf | 11 +++++++---- 8 files changed, 26 insertions(+), 13 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 9a351cc..f47e587 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -62,7 +62,7 @@ process { ext.args = [ // trimming options params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "", - params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "", + params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "", // filtering options "--length_required ${params.shortread_qc_minlength}", (params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp') ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : '' @@ -82,8 +82,8 @@ process { params.shortread_qc_excludeunmerged ? '' : "--include_unmerged", // trimming options params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "", - params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "", - params.shortread_qc_adapter2 ? "--adapter_sequence_r2 ${params.shortread_qc_adapter2}" : "--detect_adapter_for_pe", + params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "", + params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter2 ? "--adapter_sequence_r2 ${params.shortread_qc_adapter2}" : "--detect_adapter_for_pe", // filtering options "--length_required ${params.shortread_qc_minlength}", params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp' ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : '' diff --git a/modules.json b/modules.json index ab32eec..3527a44 100644 --- a/modules.json +++ b/modules.json @@ -55,7 +55,7 @@ }, "fastp": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + "git_sha": "1e49f31e93c56a3832833eef90a02d3cde5a3f7e" }, "fastqc": { "branch": "master", diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf index 11ea4db..207258a 100644 --- a/modules/nf-core/fastp/main.nf +++ b/modules/nf-core/fastp/main.nf @@ -9,6 +9,7 @@ process FASTP { input: tuple val(meta), path(reads) + path adapter_fasta val save_trimmed_fail val save_merged @@ -27,6 +28,7 @@ process FASTP { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : "" def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' // Added soft-links to original fastqs for consistent naming in MultiQC // Use single ended for interleaved. Add --interleaved_in in config. @@ -40,6 +42,7 @@ process FASTP { --thread $task.cpus \\ --json ${prefix}.fastp.json \\ --html ${prefix}.fastp.html \\ + $adapter_list \\ $fail_fastq \\ $args \\ 2> ${prefix}.fastp.log \\ @@ -61,6 +64,7 @@ process FASTP { --thread $task.cpus \\ --json ${prefix}.fastp.json \\ --html ${prefix}.fastp.html \\ + $adapter_list \\ $fail_fastq \\ $args \\ 2> ${prefix}.fastp.log @@ -82,6 +86,7 @@ process FASTP { --out2 ${prefix}_2.fastp.fastq.gz \\ --json ${prefix}.fastp.json \\ --html ${prefix}.fastp.html \\ + $adapter_list \\ $fail_fastq \\ $merge_fastq \\ --thread $task.cpus \\ diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml index 2368fde..6f6fad7 100644 --- a/modules/nf-core/fastp/meta.yml +++ b/modules/nf-core/fastp/meta.yml @@ -23,6 +23,10 @@ input: List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. If you wish to run interleaved paired-end data, supply as single-end data but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. + - adapter_fasta: + type: file + description: File in FASTA format containing possible adapters to remove. + pattern: "*.{fasta,fna,fas,fa}" - save_trimmed_fail: type: boolean description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz` diff --git a/nextflow_schema.json b/nextflow_schema.json index 04ba938..b96fdac 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -107,8 +107,8 @@ "shortread_qc_adapterlist": { "type": "string", "default": "None", - "fa_icon": "fas fa-list-ul", - "help_text": "This affects AdapterRemoval. It will replace default adapters or whatever is supplied in `--shortread_qc_adapter{1,2}`.\n\nThis allows you to mix libraries built with different adapters into one run.\n\n> Modifies tool parameter(s):\n> - AdapterRemoval: `--adapter-list`." + "description": "Specify a list of all possible adapters to trim. Overrides --shortread_qc_adapter1/2. Formats: .txt (AdapterRemoval) or .fasta. (fastp).", + "help_text": "Allows to supply a file with a list of adapter (combinations) to remove from all files. \n\nOverrides the --shortread_qc_adapter1/--shortread_qc_adapter2 parameters . \n\nFor AdapterRemoval this consists of a two column table with a `.txt` extension: first column represents forward strand, second column for reverse strand. You must supply all possibly combinations, one per line, and this list is applied to all files. See AdapterRemoval documentation for more information.\n\nFor fastp this consists of a standard FASTA format with a `.fasta`/`.fa`/`.fna`/`.fas` extension. The adapter sequence in this file should be at least 6bp long, otherwise it will be skipped. fastp trims the adapters present in the FASTA file one by one.\n\n> Modifies AdapterRemoval parameter: --adapter-list\n> Modifies fastp parameter: --adapter_fasta" }, "shortread_qc_mergepairs": { "type": "boolean", diff --git a/subworkflows/local/shortread_fastp.nf b/subworkflows/local/shortread_fastp.nf index d466041..cac5a27 100644 --- a/subworkflows/local/shortread_fastp.nf +++ b/subworkflows/local/shortread_fastp.nf @@ -8,6 +8,7 @@ include { FASTP as FASTP_PAIRED } from '../../modules/nf-core/fastp/main' workflow SHORTREAD_FASTP { take: reads // [[meta], [reads]] + adapterlist main: ch_versions = Channel.empty() @@ -19,9 +20,9 @@ workflow SHORTREAD_FASTP { paired: it[0]['single_end'] == false } - FASTP_SINGLE ( ch_input_for_fastp.single, false, false ) + FASTP_SINGLE ( ch_input_for_fastp.single, adapterlist, false, false ) // Last parameter here turns on merging of PE data - FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_qc_mergepairs ) + FASTP_PAIRED ( ch_input_for_fastp.paired, adapterlist, false, params.shortread_qc_mergepairs ) if ( params.shortread_qc_mergepairs ) { ch_fastp_reads_prepped_pe = FASTP_PAIRED.out.reads_merged diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index d381d34..c823e3d 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -18,7 +18,7 @@ workflow SHORTREAD_PREPROCESSING { ch_multiqc_files = Channel.empty() if ( params.shortread_qc_tool == "fastp" ) { - ch_processed_reads = SHORTREAD_FASTP ( reads ).reads + ch_processed_reads = SHORTREAD_FASTP ( reads, adapterlist ).reads ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions ) ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc ) } else if ( params.shortread_qc_tool == "adapterremoval" ) { diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index e6ccc16..bf05d54 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -103,7 +103,12 @@ workflow TAXPROFILER { ch_versions = Channel.empty() ch_multiqc_logo= Channel.fromPath("$projectDir/docs/images/nf-core-taxprofiler_logo_custom_light.png") - ch_adapterlist_for_shortreadqc = params.shortread_qc_adapterlist ? file(params.shortread_qc_adapterlist) : [] + adapterlist = params.shortread_qc_adapterlist ? file(params.shortread_qc_adapterlist) : [] + + if ( params.shortread_qc_adapterlist ) { + if ( params.shortread_qc_tool == 'adapterremoval' && !(adapterlist.extension == 'txt') ) error "[nf-core/taxprofiler] ERROR: AdapterRemoval2 adapter list requires a `.txt` format and extension. Check input: --shortread_qc_adapterlist ${params.shortread_qc_adapterlist}" + if ( params.shortread_qc_tool == 'fastp' && !adapterlist.extension.matches(".*(fa|fasta|fna|fas)") ) error "[nf-core/taxprofiler] ERROR: fastp adapter list requires a `.fasta` format and extension (or fa, fas, fna). Check input: --shortread_qc_adapterlist ${params.shortread_qc_adapterlist}" + } /* SUBWORKFLOW: Read in samplesheet, validate and stage input files @@ -134,10 +139,8 @@ workflow TAXPROFILER { SUBWORKFLOW: PERFORM PREPROCESSING */ - ch_adapterlist_for_shortreadqc - if ( params.perform_shortread_qc ) { - ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq, ch_adapterlist_for_shortreadqc ).reads + ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq, adapterlist ).reads ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions ) } else { ch_shortreads_preprocessed = INPUT_CHECK.out.fastq From 24dc2a2cecc77b5a5beb0a16042814374f985df5 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 3 Nov 2022 10:03:07 +0100 Subject: [PATCH 6/7] Apply suggestions from code review Co-authored-by: Moritz E. Beber --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index b96fdac..7c7e8dc 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -108,7 +108,7 @@ "type": "string", "default": "None", "description": "Specify a list of all possible adapters to trim. Overrides --shortread_qc_adapter1/2. Formats: .txt (AdapterRemoval) or .fasta. (fastp).", - "help_text": "Allows to supply a file with a list of adapter (combinations) to remove from all files. \n\nOverrides the --shortread_qc_adapter1/--shortread_qc_adapter2 parameters . \n\nFor AdapterRemoval this consists of a two column table with a `.txt` extension: first column represents forward strand, second column for reverse strand. You must supply all possibly combinations, one per line, and this list is applied to all files. See AdapterRemoval documentation for more information.\n\nFor fastp this consists of a standard FASTA format with a `.fasta`/`.fa`/`.fna`/`.fas` extension. The adapter sequence in this file should be at least 6bp long, otherwise it will be skipped. fastp trims the adapters present in the FASTA file one by one.\n\n> Modifies AdapterRemoval parameter: --adapter-list\n> Modifies fastp parameter: --adapter_fasta" + "help_text": "Allows to supply a file with a list of adapter (combinations) to remove from all files. \n\nOverrides the --shortread_qc_adapter1/--shortread_qc_adapter2 parameters . \n\nFor AdapterRemoval this consists of a two column table with a `.txt` extension: first column represents forward strand, second column for reverse strand. You must supply all possible combinations, one per line, and this list is applied to all files. See AdapterRemoval documentation for more information.\n\nFor fastp this consists of a standard FASTA format with a `.fasta`/`.fa`/`.fna`/`.fas` extension. The adapter sequence in this file should be at least 6bp long, otherwise it will be skipped. fastp trims the adapters present in the FASTA file one by one.\n\n> Modifies AdapterRemoval parameter: --adapter-list\n> Modifies fastp parameter: --adapter_fasta" }, "shortread_qc_mergepairs": { "type": "boolean", From 62f45c8be7b1f592843cb2a4ca0c214e51beaf81 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 3 Nov 2022 10:03:27 +0100 Subject: [PATCH 7/7] Update nextflow.config --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 9bf7f83..d11abd9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -69,7 +69,7 @@ params { shortread_qc_excludeunmerged = false shortread_qc_adapter1 = null shortread_qc_adapter2 = null - shortread_qc_adapterlist = null // TODO Add for FASTP + shortread_qc_adapterlist = null shortread_qc_minlength = 15 perform_longread_qc = false