From 76a662ecc5e6a63b569b887df52ac41d1946cb7d Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 27 Oct 2022 12:17:19 +0000 Subject: [PATCH] Fix adapterremoval adapter conflict and add optional adapter list file --- conf/modules.config | 8 +-- modules.json | 2 +- modules/nf-core/adapterremoval/main.nf | 4 +- nextflow.config | 3 +- nextflow_schema.json | 61 +++++++++++++++---- .../local/shortread_adapterremoval.nf | 5 +- subworkflows/local/shortread_preprocessing.nf | 1 + workflows/taxprofiler.nf | 9 ++- 8 files changed, 69 insertions(+), 24 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index b1d7b87..9a351cc 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -100,8 +100,7 @@ process { withName: ADAPTERREMOVAL_SINGLE { ext.args = [ // trimming options - params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "", - params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "", + params.shortread_qc_skipadaptertrim ? "--adapter1 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "", // filtering options "--minlength ${params.shortread_qc_minlength}" ].join(' ').trim() @@ -119,9 +118,8 @@ process { // collapsing options params.shortread_qc_mergepairs ? "--collapse" : "", // trimming options - params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "", - params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "", - params.shortread_qc_adapter2 ? "--adapter2 ${params.shortread_qc_adapter2}" : "", + params.shortread_qc_skipadaptertrim ? "--adapter1 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "", // adding adapter list happens at module input channel level + params.shortread_qc_skipadaptertrim ? "--adapter2 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter2 ? "--adapter2 ${params.shortread_qc_adapter2}" : "", // filtering options "--minlength ${params.shortread_qc_minlength}" ].join(' ').trim() diff --git a/modules.json b/modules.json index aa77c4e..ab32eec 100644 --- a/modules.json +++ b/modules.json @@ -7,7 +7,7 @@ "nf-core": { "adapterremoval": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + "git_sha": "ce7cf27e377fdacf7ebe8e75903ec70405ea1659" }, "bbmap/bbduk": { "branch": "master", diff --git a/modules/nf-core/adapterremoval/main.nf b/modules/nf-core/adapterremoval/main.nf index 0e17c05..643c141 100644 --- a/modules/nf-core/adapterremoval/main.nf +++ b/modules/nf-core/adapterremoval/main.nf @@ -34,7 +34,7 @@ process ADAPTERREMOVAL { AdapterRemoval \\ --file1 $reads \\ $args \\ - $adapterlist \\ + $list \\ --basename ${prefix} \\ --threads ${task.cpus} \\ --seed 42 \\ @@ -61,7 +61,7 @@ process ADAPTERREMOVAL { --file1 ${reads[0]} \\ --file2 ${reads[1]} \\ $args \\ - $adapterlist \\ + $list \\ --basename ${prefix} \\ --threads $task.cpus \\ --seed 42 \\ diff --git a/nextflow.config b/nextflow.config index b69948e..9bf7f83 100644 --- a/nextflow.config +++ b/nextflow.config @@ -59,9 +59,9 @@ params { // Databases databases = null + // FASTQ preprocessing preprocessing_qc_tool = 'fastqc' - // FASTQ preprocessing perform_shortread_qc = false shortread_qc_tool = 'fastp' shortread_qc_skipadaptertrim = false @@ -69,6 +69,7 @@ params { shortread_qc_excludeunmerged = false shortread_qc_adapter1 = null shortread_qc_adapter2 = null + shortread_qc_adapterlist = null // TODO Add for FASTP shortread_qc_minlength = 15 perform_longread_qc = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 4479f54..59131b6 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,11 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir", "databases"], + "required": [ + "input", + "databases", + "outdir" + ], "properties": { "input": { "type": "string", @@ -80,7 +84,10 @@ "shortread_qc_tool": { "type": "string", "default": "fastp", - "enum": ["fastp", "adapterremoval"], + "enum": [ + "fastp", + "adapterremoval" + ], "fa_icon": "fas fa-tools", "description": "Specify which tool to use for short-read QC" }, @@ -104,6 +111,12 @@ "description": "Specify adapter 2 nucleotide sequence", "help_text": "Specify a custom reverse or R2 adapter sequence to be removed from reads. \n\nIf not set, the selected short-read QC tool's defaults will be used.\n\n> Modifies tool parameter(s):\n> - fastp: `--adapter_sequence`. fastp default: `AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT`\n> - AdapterRemoval: `--adapter1`. AdapteRemoval2 default: `AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT`" }, + "shortread_qc_adapterlist": { + "type": "string", + "default": "None", + "fa_icon": "fas fa-list-ul", + "help_text": "This affects AdapterRemoval. It will replace default adapters or whatever is supplied in `--shortread_qc_adapter{1,2}`.\n\nThis allows you to mix libraries built with different adapters into one run.\n\n> Modifies tool parameter(s):\n> - AdapterRemoval: `--adapter-list`." + }, "shortread_qc_mergepairs": { "type": "boolean", "fa_icon": "fas fa-toggle-on", @@ -133,7 +146,11 @@ "shortread_complexityfilter_tool": { "type": "string", "default": "bbduk", - "enum": ["bbduk", "prinseqplusplus", "fastp"], + "enum": [ + "bbduk", + "prinseqplusplus", + "fastp" + ], "fa_icon": "fas fa-hammer", "description": "Specify which tool to use for complexity filtering" }, @@ -167,7 +184,10 @@ "shortread_complexityfilter_prinseqplusplus_mode": { "type": "string", "default": "entropy", - "enum": ["entropy", "dust"], + "enum": [ + "entropy", + "dust" + ], "fa_icon": "fas fa-check-square", "description": "Specify the complexity filter mode for PRINSEQ++" }, @@ -341,7 +361,15 @@ "diamond_output_format": { "type": "string", "default": "tsv", - "enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"], + "enum": [ + "blast", + "xml", + "txt", + "daa", + "sam", + "tsv", + "paf" + ], "fa_icon": "fas fa-file", "description": "Specify output format from DIAMOND profiling.", "help_text": "DIAMOND can produce output in a number of different formats, you can specify here which to produce.\n\nNote that DIAMOND can only produce one format at a time, and depending on which you pick, some downstream steps may not be executed. For example, selecting `daa` or `sam` will mean you will not get a tabular taxonomic profile as with the other tools.\n\nWill be overriden by `--diamond_save_reads.`\n\n> Modifies tool parameter(s):\n> - diamond blastx: `--outfmt`" @@ -360,7 +388,14 @@ "kaiju_taxon_rank": { "type": "string", "default": "species", - "enum": ["phylum", "class", "order", "family", "genus", "species"], + "enum": [ + "phylum", + "class", + "order", + "family", + "genus", + "species" + ], "fa_icon": "fas fa-tag", "description": "Specify taxonomic rank to be displayed in Kaiju taxon table", "help_text": "Specify the taxonomic level(s) to be displayed in the resulting Kaiju taxon table, as generated by the kaiju2table helper tool.\n\nThis can be either a single level (e.g. `species`), or a comma separated list to display the full taxonomic path (e.g. `superkingdom,phylum,class,order,family,genus,species.`).\n\n> Modifies tool parameter(s):\n> - kaiju2table: `-l`" @@ -555,7 +590,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, "email_on_fail": { @@ -711,10 +753,7 @@ "properties": { "preprocessing_qc_tool": { "type": "string", - "default": "fastqc", - "enum": ["fastqc", "falco"], - "help_text": "Falco is designed as a drop-in replacement for FastQC but written in C++ for faster computation. We particularly recommend using falco when using long reads (due to reduced memory constraints), however is also applicable for short reads.", - "description": "Specify the tool used for quality control of raw sequencing reads" + "default": "fastqc" } } } diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf index a5a43fe..6567007 100644 --- a/subworkflows/local/shortread_adapterremoval.nf +++ b/subworkflows/local/shortread_adapterremoval.nf @@ -10,6 +10,7 @@ workflow SHORTREAD_ADAPTERREMOVAL { take: reads // [[meta], [reads]] + adapterlist // file main: ch_versions = Channel.empty() @@ -21,8 +22,8 @@ workflow SHORTREAD_ADAPTERREMOVAL { paired: !it[0].single_end } - ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, [] ) - ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, [] ) + ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, adapterlist ) + ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, adapterlist ) /* * Due to the ~slightly~ very ugly output implementation of the current AdapterRemoval2 version, each file diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index f2cd738..6e14423 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -11,6 +11,7 @@ include { FALCO as FALCO_PROCESSED } from '../../modules/nf-core/falco/main' workflow SHORTREAD_PREPROCESSING { take: reads // [ [ meta ], [ reads ] ] + adapterlist // file main: ch_versions = Channel.empty() diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 1ac519c..e6ccc16 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -12,7 +12,8 @@ WorkflowTaxprofiler.initialise(params, log) // TODO nf-core: Add all file path parameters for the pipeline to the list below // Check input path parameters to see if they exist def checkPathParamList = [ params.input, params.databases, params.hostremoval_reference, - params.shortread_hostremoval_index, params.multiqc_config + params.shortread_hostremoval_index, params.multiqc_config, + params.shortread_qc_adapterlist ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } @@ -102,6 +103,7 @@ workflow TAXPROFILER { ch_versions = Channel.empty() ch_multiqc_logo= Channel.fromPath("$projectDir/docs/images/nf-core-taxprofiler_logo_custom_light.png") + ch_adapterlist_for_shortreadqc = params.shortread_qc_adapterlist ? file(params.shortread_qc_adapterlist) : [] /* SUBWORKFLOW: Read in samplesheet, validate and stage input files @@ -131,8 +133,11 @@ workflow TAXPROFILER { /* SUBWORKFLOW: PERFORM PREPROCESSING */ + + ch_adapterlist_for_shortreadqc + if ( params.perform_shortread_qc ) { - ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq ).reads + ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq, ch_adapterlist_for_shortreadqc ).reads ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions ) } else { ch_shortreads_preprocessed = INPUT_CHECK.out.fastq