From 76a662ecc5e6a63b569b887df52ac41d1946cb7d Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 27 Oct 2022 12:17:19 +0000 Subject: [PATCH 01/12] Fix adapterremoval adapter conflict and add optional adapter list file --- conf/modules.config | 8 +-- modules.json | 2 +- modules/nf-core/adapterremoval/main.nf | 4 +- nextflow.config | 3 +- nextflow_schema.json | 61 +++++++++++++++---- .../local/shortread_adapterremoval.nf | 5 +- subworkflows/local/shortread_preprocessing.nf | 1 + workflows/taxprofiler.nf | 9 ++- 8 files changed, 69 insertions(+), 24 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index b1d7b87..9a351cc 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -100,8 +100,7 @@ process { withName: ADAPTERREMOVAL_SINGLE { ext.args = [ // trimming options - params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "", - params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "", + params.shortread_qc_skipadaptertrim ? "--adapter1 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "", // filtering options "--minlength ${params.shortread_qc_minlength}" ].join(' ').trim() @@ -119,9 +118,8 @@ process { // collapsing options params.shortread_qc_mergepairs ? "--collapse" : "", // trimming options - params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "", - params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "", - params.shortread_qc_adapter2 ? "--adapter2 ${params.shortread_qc_adapter2}" : "", + params.shortread_qc_skipadaptertrim ? "--adapter1 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "", // adding adapter list happens at module input channel level + params.shortread_qc_skipadaptertrim ? "--adapter2 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter2 ? "--adapter2 ${params.shortread_qc_adapter2}" : "", // filtering options "--minlength ${params.shortread_qc_minlength}" ].join(' ').trim() diff --git a/modules.json b/modules.json index aa77c4e..ab32eec 100644 --- a/modules.json +++ b/modules.json @@ -7,7 +7,7 @@ "nf-core": { "adapterremoval": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + "git_sha": "ce7cf27e377fdacf7ebe8e75903ec70405ea1659" }, "bbmap/bbduk": { "branch": "master", diff --git a/modules/nf-core/adapterremoval/main.nf b/modules/nf-core/adapterremoval/main.nf index 0e17c05..643c141 100644 --- a/modules/nf-core/adapterremoval/main.nf +++ b/modules/nf-core/adapterremoval/main.nf @@ -34,7 +34,7 @@ process ADAPTERREMOVAL { AdapterRemoval \\ --file1 $reads \\ $args \\ - $adapterlist \\ + $list \\ --basename ${prefix} \\ --threads ${task.cpus} \\ --seed 42 \\ @@ -61,7 +61,7 @@ process ADAPTERREMOVAL { --file1 ${reads[0]} \\ --file2 ${reads[1]} \\ $args \\ - $adapterlist \\ + $list \\ --basename ${prefix} \\ --threads $task.cpus \\ --seed 42 \\ diff --git a/nextflow.config b/nextflow.config index b69948e..9bf7f83 100644 --- a/nextflow.config +++ b/nextflow.config @@ -59,9 +59,9 @@ params { // Databases databases = null + // FASTQ preprocessing preprocessing_qc_tool = 'fastqc' - // FASTQ preprocessing perform_shortread_qc = false shortread_qc_tool = 'fastp' shortread_qc_skipadaptertrim = false @@ -69,6 +69,7 @@ params { shortread_qc_excludeunmerged = false shortread_qc_adapter1 = null shortread_qc_adapter2 = null + shortread_qc_adapterlist = null // TODO Add for FASTP shortread_qc_minlength = 15 perform_longread_qc = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 4479f54..59131b6 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,11 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir", "databases"], + "required": [ + "input", + "databases", + "outdir" + ], "properties": { "input": { "type": "string", @@ -80,7 +84,10 @@ "shortread_qc_tool": { "type": "string", "default": "fastp", - "enum": ["fastp", "adapterremoval"], + "enum": [ + "fastp", + "adapterremoval" + ], "fa_icon": "fas fa-tools", "description": "Specify which tool to use for short-read QC" }, @@ -104,6 +111,12 @@ "description": "Specify adapter 2 nucleotide sequence", "help_text": "Specify a custom reverse or R2 adapter sequence to be removed from reads. \n\nIf not set, the selected short-read QC tool's defaults will be used.\n\n> Modifies tool parameter(s):\n> - fastp: `--adapter_sequence`. fastp default: `AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT`\n> - AdapterRemoval: `--adapter1`. AdapteRemoval2 default: `AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT`" }, + "shortread_qc_adapterlist": { + "type": "string", + "default": "None", + "fa_icon": "fas fa-list-ul", + "help_text": "This affects AdapterRemoval. It will replace default adapters or whatever is supplied in `--shortread_qc_adapter{1,2}`.\n\nThis allows you to mix libraries built with different adapters into one run.\n\n> Modifies tool parameter(s):\n> - AdapterRemoval: `--adapter-list`." + }, "shortread_qc_mergepairs": { "type": "boolean", "fa_icon": "fas fa-toggle-on", @@ -133,7 +146,11 @@ "shortread_complexityfilter_tool": { "type": "string", "default": "bbduk", - "enum": ["bbduk", "prinseqplusplus", "fastp"], + "enum": [ + "bbduk", + "prinseqplusplus", + "fastp" + ], "fa_icon": "fas fa-hammer", "description": "Specify which tool to use for complexity filtering" }, @@ -167,7 +184,10 @@ "shortread_complexityfilter_prinseqplusplus_mode": { "type": "string", "default": "entropy", - "enum": ["entropy", "dust"], + "enum": [ + "entropy", + "dust" + ], "fa_icon": "fas fa-check-square", "description": "Specify the complexity filter mode for PRINSEQ++" }, @@ -341,7 +361,15 @@ "diamond_output_format": { "type": "string", "default": "tsv", - "enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"], + "enum": [ + "blast", + "xml", + "txt", + "daa", + "sam", + "tsv", + "paf" + ], "fa_icon": "fas fa-file", "description": "Specify output format from DIAMOND profiling.", "help_text": "DIAMOND can produce output in a number of different formats, you can specify here which to produce.\n\nNote that DIAMOND can only produce one format at a time, and depending on which you pick, some downstream steps may not be executed. For example, selecting `daa` or `sam` will mean you will not get a tabular taxonomic profile as with the other tools.\n\nWill be overriden by `--diamond_save_reads.`\n\n> Modifies tool parameter(s):\n> - diamond blastx: `--outfmt`" @@ -360,7 +388,14 @@ "kaiju_taxon_rank": { "type": "string", "default": "species", - "enum": ["phylum", "class", "order", "family", "genus", "species"], + "enum": [ + "phylum", + "class", + "order", + "family", + "genus", + "species" + ], "fa_icon": "fas fa-tag", "description": "Specify taxonomic rank to be displayed in Kaiju taxon table", "help_text": "Specify the taxonomic level(s) to be displayed in the resulting Kaiju taxon table, as generated by the kaiju2table helper tool.\n\nThis can be either a single level (e.g. `species`), or a comma separated list to display the full taxonomic path (e.g. `superkingdom,phylum,class,order,family,genus,species.`).\n\n> Modifies tool parameter(s):\n> - kaiju2table: `-l`" @@ -555,7 +590,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, "email_on_fail": { @@ -711,10 +753,7 @@ "properties": { "preprocessing_qc_tool": { "type": "string", - "default": "fastqc", - "enum": ["fastqc", "falco"], - "help_text": "Falco is designed as a drop-in replacement for FastQC but written in C++ for faster computation. We particularly recommend using falco when using long reads (due to reduced memory constraints), however is also applicable for short reads.", - "description": "Specify the tool used for quality control of raw sequencing reads" + "default": "fastqc" } } } diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf index a5a43fe..6567007 100644 --- a/subworkflows/local/shortread_adapterremoval.nf +++ b/subworkflows/local/shortread_adapterremoval.nf @@ -10,6 +10,7 @@ workflow SHORTREAD_ADAPTERREMOVAL { take: reads // [[meta], [reads]] + adapterlist // file main: ch_versions = Channel.empty() @@ -21,8 +22,8 @@ workflow SHORTREAD_ADAPTERREMOVAL { paired: !it[0].single_end } - ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, [] ) - ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, [] ) + ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, adapterlist ) + ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, adapterlist ) /* * Due to the ~slightly~ very ugly output implementation of the current AdapterRemoval2 version, each file diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index f2cd738..6e14423 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -11,6 +11,7 @@ include { FALCO as FALCO_PROCESSED } from '../../modules/nf-core/falco/main' workflow SHORTREAD_PREPROCESSING { take: reads // [ [ meta ], [ reads ] ] + adapterlist // file main: ch_versions = Channel.empty() diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 1ac519c..e6ccc16 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -12,7 +12,8 @@ WorkflowTaxprofiler.initialise(params, log) // TODO nf-core: Add all file path parameters for the pipeline to the list below // Check input path parameters to see if they exist def checkPathParamList = [ params.input, params.databases, params.hostremoval_reference, - params.shortread_hostremoval_index, params.multiqc_config + params.shortread_hostremoval_index, params.multiqc_config, + params.shortread_qc_adapterlist ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } @@ -102,6 +103,7 @@ workflow TAXPROFILER { ch_versions = Channel.empty() ch_multiqc_logo= Channel.fromPath("$projectDir/docs/images/nf-core-taxprofiler_logo_custom_light.png") + ch_adapterlist_for_shortreadqc = params.shortread_qc_adapterlist ? file(params.shortread_qc_adapterlist) : [] /* SUBWORKFLOW: Read in samplesheet, validate and stage input files @@ -131,8 +133,11 @@ workflow TAXPROFILER { /* SUBWORKFLOW: PERFORM PREPROCESSING */ + + ch_adapterlist_for_shortreadqc + if ( params.perform_shortread_qc ) { - ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq ).reads + ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq, ch_adapterlist_for_shortreadqc ).reads ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions ) } else { ch_shortreads_preprocessed = INPUT_CHECK.out.fastq From 733d3dc8c57078a7c2032ef028429aefc552e3de Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 27 Oct 2022 14:19:28 +0200 Subject: [PATCH 02/12] Apply suggestions from code review --- nextflow_schema.json | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 59131b6..5c60775 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -753,7 +753,10 @@ "properties": { "preprocessing_qc_tool": { "type": "string", - "default": "fastqc" + "default": "fastqc", + "enum": ["fastqc", "falco"], + "help_text": "Falco is designed as a drop-in replacement for FastQC but written in C++ for faster computation. We particularly recommend using falco when using long reads (due to reduced memory constraints), however is also applicable for short reads.", + "description": "Specify the tool used for quality control of raw sequencing reads" } } } From a402ee3be200089757096207dba17cb1b4ffb5a6 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Thu, 27 Oct 2022 12:20:08 +0000 Subject: [PATCH 03/12] [automated] Fix linting with Prettier --- nextflow_schema.json | 50 +++++++------------------------------------- 1 file changed, 7 insertions(+), 43 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 5c60775..04ba938 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,11 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "databases", - "outdir" - ], + "required": ["input", "databases", "outdir"], "properties": { "input": { "type": "string", @@ -84,10 +80,7 @@ "shortread_qc_tool": { "type": "string", "default": "fastp", - "enum": [ - "fastp", - "adapterremoval" - ], + "enum": ["fastp", "adapterremoval"], "fa_icon": "fas fa-tools", "description": "Specify which tool to use for short-read QC" }, @@ -146,11 +139,7 @@ "shortread_complexityfilter_tool": { "type": "string", "default": "bbduk", - "enum": [ - "bbduk", - "prinseqplusplus", - "fastp" - ], + "enum": ["bbduk", "prinseqplusplus", "fastp"], "fa_icon": "fas fa-hammer", "description": "Specify which tool to use for complexity filtering" }, @@ -184,10 +173,7 @@ "shortread_complexityfilter_prinseqplusplus_mode": { "type": "string", "default": "entropy", - "enum": [ - "entropy", - "dust" - ], + "enum": ["entropy", "dust"], "fa_icon": "fas fa-check-square", "description": "Specify the complexity filter mode for PRINSEQ++" }, @@ -361,15 +347,7 @@ "diamond_output_format": { "type": "string", "default": "tsv", - "enum": [ - "blast", - "xml", - "txt", - "daa", - "sam", - "tsv", - "paf" - ], + "enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"], "fa_icon": "fas fa-file", "description": "Specify output format from DIAMOND profiling.", "help_text": "DIAMOND can produce output in a number of different formats, you can specify here which to produce.\n\nNote that DIAMOND can only produce one format at a time, and depending on which you pick, some downstream steps may not be executed. For example, selecting `daa` or `sam` will mean you will not get a tabular taxonomic profile as with the other tools.\n\nWill be overriden by `--diamond_save_reads.`\n\n> Modifies tool parameter(s):\n> - diamond blastx: `--outfmt`" @@ -388,14 +366,7 @@ "kaiju_taxon_rank": { "type": "string", "default": "species", - "enum": [ - "phylum", - "class", - "order", - "family", - "genus", - "species" - ], + "enum": ["phylum", "class", "order", "family", "genus", "species"], "fa_icon": "fas fa-tag", "description": "Specify taxonomic rank to be displayed in Kaiju taxon table", "help_text": "Specify the taxonomic level(s) to be displayed in the resulting Kaiju taxon table, as generated by the kaiju2table helper tool.\n\nThis can be either a single level (e.g. `species`), or a comma separated list to display the full taxonomic path (e.g. `superkingdom,phylum,class,order,family,genus,species.`).\n\n> Modifies tool parameter(s):\n> - kaiju2table: `-l`" @@ -590,14 +561,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { From 08d61065e5fe4bbae15b93690bf77331be89785e Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 27 Oct 2022 12:23:27 +0000 Subject: [PATCH 04/12] Add adapterlist to AR2 subworkflow --- subworkflows/local/shortread_preprocessing.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index 6e14423..d381d34 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -22,7 +22,7 @@ workflow SHORTREAD_PREPROCESSING { ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions ) ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc ) } else if ( params.shortread_qc_tool == "adapterremoval" ) { - ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads + ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads, adapterlist ).reads ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions ) ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc ) } else { From 1bd6dbfe58b16d3115ff214295dd6835a6a8f4d5 Mon Sep 17 00:00:00 2001 From: Sofia Stamouli Date: Fri, 28 Oct 2022 11:40:50 +0200 Subject: [PATCH 05/12] Add samtools stats for long-reads --- assets/multiqc_config.yml | 1 + modules.json | 8 ++++ modules/nf-core/samtools/index/main.nf | 48 ++++++++++++++++++++ modules/nf-core/samtools/index/meta.yml | 53 ++++++++++++++++++++++ modules/nf-core/samtools/stats/main.nf | 49 ++++++++++++++++++++ modules/nf-core/samtools/stats/meta.yml | 53 ++++++++++++++++++++++ subworkflows/local/longread_hostremoval.nf | 15 ++++++ workflows/taxprofiler.nf | 4 ++ 8 files changed, 231 insertions(+) create mode 100644 modules/nf-core/samtools/index/main.nf create mode 100644 modules/nf-core/samtools/index/meta.yml create mode 100644 modules/nf-core/samtools/stats/main.nf create mode 100644 modules/nf-core/samtools/stats/meta.yml diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index e2b5a6e..e4a04a9 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -21,6 +21,7 @@ run_modules: - adapterRemoval - fastp - bowtie2 + - samtools - kraken - malt - custom_content diff --git a/modules.json b/modules.json index bcf2567..be3d193 100644 --- a/modules.json +++ b/modules.json @@ -153,6 +153,14 @@ "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" }, + "samtools/index": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "samtools/stats": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, "samtools/view": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf new file mode 100644 index 0000000..e04e63e --- /dev/null +++ b/modules/nf-core/samtools/index/main.nf @@ -0,0 +1,48 @@ +process SAMTOOLS_INDEX { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai"), optional:true, emit: crai + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + index \\ + -@ ${task.cpus-1} \\ + $args \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${input}.bai + touch ${input}.crai + touch ${input}.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml new file mode 100644 index 0000000..e5cadbc --- /dev/null +++ b/modules/nf-core/samtools/index/meta.yml @@ -0,0 +1,53 @@ +name: samtools_index +description: Index SAM/BAM/CRAM file +keywords: + - index + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - csi: + type: file + description: CSI index file + pattern: "*.{csi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf new file mode 100644 index 0000000..9b0c386 --- /dev/null +++ b/modules/nf-core/samtools/stats/main.nf @@ -0,0 +1,49 @@ +process SAMTOOLS_STATS { + tag "$meta.id" + label 'process_single' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(input), path(input_index) + path fasta + + output: + tuple val(meta), path("*.stats"), emit: stats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + """ + samtools \\ + stats \\ + --threads ${task.cpus} \\ + ${reference} \\ + ${input} \\ + > ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml new file mode 100644 index 0000000..cac50b1 --- /dev/null +++ b/modules/nf-core/samtools/stats/meta.yml @@ -0,0 +1,53 @@ +name: samtools_stats +description: Produces comprehensive statistics from SAM/BAM/CRAM file +keywords: + - statistics + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - fasta: + type: optional file + description: Reference file the CRAM was created with + pattern: "*.{fasta,fa}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@FriederikeHanssen" diff --git a/subworkflows/local/longread_hostremoval.nf b/subworkflows/local/longread_hostremoval.nf index 82ea8ca..fb79e12 100644 --- a/subworkflows/local/longread_hostremoval.nf +++ b/subworkflows/local/longread_hostremoval.nf @@ -6,6 +6,8 @@ include { MINIMAP2_INDEX } from '../../modules/nf-core/minimap2/inde include { MINIMAP2_ALIGN } from '../../modules/nf-core/minimap2/align/main' include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main' include { SAMTOOLS_BAM2FQ } from '../../modules/nf-core/samtools/bam2fq/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_STATS } from '../../modules/nf-core/samtools/stats/main' workflow LONGREAD_HOSTREMOVAL { take: @@ -39,9 +41,22 @@ workflow LONGREAD_HOSTREMOVAL { SAMTOOLS_BAM2FQ ( SAMTOOLS_VIEW.out.bam, false ) ch_versions = ch_versions.mix( SAMTOOLS_BAM2FQ.out.versions.first() ) + SAMTOOLS_INDEX ( SAMTOOLS_VIEW.out.bam ) + ch_versions = ch_versions.mix( SAMTOOLS_INDEX.out.versions.first() ) + + SAMTOOLS_VIEW.out.bam + .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) + .set { bam_bai } + + SAMTOOLS_STATS ( bam_bai, reference ) + ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix( SAMTOOLS_STATS.out.stats ) + emit: + stats = SAMTOOLS_STATS.out.stats //channel: [val(meta), [reads ] ] reads = SAMTOOLS_BAM2FQ.out.reads // channel: [ val(meta), [ reads ] ] versions = ch_versions // channel: [ versions.yml ] + mqc = ch_multiqc_files } diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index b456b06..07660ab 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -281,6 +281,10 @@ workflow TAXPROFILER { ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_HOSTREMOVAL.out.mqc.collect{it[1]}.ifEmpty([])) } + if (params.perform_longread_hostremoval) { + ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_HOSTREMOVAL.out.mqc.collect{it[1]}.ifEmpty([])) + } + ch_multiqc_files = ch_multiqc_files.mix( PROFILING.out.mqc.collect{it[1]}.ifEmpty([]) ) if ( params.run_profile_standardisation ) { From f87131c7b1c212c3169b0ee3d6462eaa72d2a021 Mon Sep 17 00:00:00 2001 From: Sofia Stamouli Date: Fri, 28 Oct 2022 14:10:24 +0200 Subject: [PATCH 06/12] Save samtools stats output --- conf/modules.config | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index a4d36fd..397e0c2 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -233,6 +233,16 @@ process { ] } + withName: SAMTOOLS_STATS { + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ + path: { "${params.outdir}/samtools/stats" }, + mode: params.publish_dir_mode, + enabled: params.save_hostremoval_unmapped, + pattern: '*stats' + ] + } + withName: BBMAP_BBDUK { ext.args = [ "entropy=${params.shortread_complexityfilter_entropy}", From f1778acf35a6b5e9e00601441e982acf401bdaec Mon Sep 17 00:00:00 2001 From: Sofia Stamouli <91951607+sofstam@users.noreply.github.com> Date: Mon, 31 Oct 2022 09:31:13 +0100 Subject: [PATCH 07/12] Apply review suggestion --- subworkflows/local/longread_hostremoval.nf | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/longread_hostremoval.nf b/subworkflows/local/longread_hostremoval.nf index fb79e12..5f3f746 100644 --- a/subworkflows/local/longread_hostremoval.nf +++ b/subworkflows/local/longread_hostremoval.nf @@ -44,9 +44,8 @@ workflow LONGREAD_HOSTREMOVAL { SAMTOOLS_INDEX ( SAMTOOLS_VIEW.out.bam ) ch_versions = ch_versions.mix( SAMTOOLS_INDEX.out.versions.first() ) - SAMTOOLS_VIEW.out.bam - .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) - .set { bam_bai } + bam_bai = SAMTOOLS_VIEW.out.bam + .join(SAMTOOLS_INDEX.out.bai, remainder: true) SAMTOOLS_STATS ( bam_bai, reference ) ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first()) From 3faea8aac574cd42ad3dd85f64f264521a274e18 Mon Sep 17 00:00:00 2001 From: Sofia Stamouli Date: Tue, 1 Nov 2022 17:34:37 +0100 Subject: [PATCH 08/12] Fix porechop conflict --- modules.json | 4 ++-- .../nf-core/porechop/{ => porechop}/main.nf | 3 +-- .../nf-core/porechop/{ => porechop}/meta.yml | 2 +- subworkflows/local/longread_preprocessing.nf | 18 +++++++++--------- 4 files changed, 13 insertions(+), 14 deletions(-) rename modules/nf-core/porechop/{ => porechop}/main.nf (97%) rename modules/nf-core/porechop/{ => porechop}/meta.yml (98%) diff --git a/modules.json b/modules.json index be3d193..c0e0a3a 100644 --- a/modules.json +++ b/modules.json @@ -141,9 +141,9 @@ "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" }, - "porechop": { + "porechop/porechop": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + "git_sha": "2a4e85eb81875a572bb58133e37f84ba3cc484d7" }, "prinseqplusplus": { "branch": "master", diff --git a/modules/nf-core/porechop/main.nf b/modules/nf-core/porechop/porechop/main.nf similarity index 97% rename from modules/nf-core/porechop/main.nf rename to modules/nf-core/porechop/porechop/main.nf index 77050bc..f946417 100644 --- a/modules/nf-core/porechop/main.nf +++ b/modules/nf-core/porechop/porechop/main.nf @@ -1,4 +1,4 @@ -process PORECHOP { +process PORECHOP_PORECHOP { tag "$meta.id" label 'process_medium' @@ -28,7 +28,6 @@ process PORECHOP { $args \\ -o ${prefix}.fastq.gz \\ > ${prefix}.log - cat <<-END_VERSIONS > versions.yml "${task.process}": porechop: \$( porechop --version ) diff --git a/modules/nf-core/porechop/meta.yml b/modules/nf-core/porechop/porechop/meta.yml similarity index 98% rename from modules/nf-core/porechop/meta.yml rename to modules/nf-core/porechop/porechop/meta.yml index e526317..98b838f 100644 --- a/modules/nf-core/porechop/meta.yml +++ b/modules/nf-core/porechop/porechop/meta.yml @@ -1,4 +1,4 @@ -name: porechop +name: "porechop_porechop" description: Adapter removal and demultiplexing of Oxford Nanopore reads keywords: - adapter diff --git a/subworkflows/local/longread_preprocessing.nf b/subworkflows/local/longread_preprocessing.nf index 3da49ce..961417d 100644 --- a/subworkflows/local/longread_preprocessing.nf +++ b/subworkflows/local/longread_preprocessing.nf @@ -5,7 +5,7 @@ include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/fastqc/main' include { FALCO as FALCO_PROCESSED } from '../../modules/nf-core/falco/main' -include { PORECHOP } from '../../modules/nf-core/porechop/main' +include { PORECHOP_PORECHOP } from '../../modules/nf-core/porechop/porechop/main' include { FILTLONG } from '../../modules/nf-core/filtlong/main' workflow LONGREAD_PREPROCESSING { @@ -17,9 +17,9 @@ workflow LONGREAD_PREPROCESSING { ch_multiqc_files = Channel.empty() if ( !params.longread_qc_skipadaptertrim && params.longread_qc_skipqualityfilter) { - PORECHOP ( reads ) + PORECHOP_PORECHOP ( reads ) - ch_processed_reads = PORECHOP.out.reads + ch_processed_reads = PORECHOP_PORECHOP.out.reads .map { meta, reads -> def meta_new = meta.clone() @@ -27,8 +27,8 @@ workflow LONGREAD_PREPROCESSING { [ meta_new, reads ] } - ch_versions = ch_versions.mix(PORECHOP.out.versions.first()) - ch_multiqc_files = ch_multiqc_files.mix( PORECHOP.out.log ) + ch_versions = ch_versions.mix(PORECHOP_PORECHOP.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix( PORECHOP_PORECHOP.out.log ) } else if ( params.longread_qc_skipadaptertrim && !params.longread_qc_skipqualityfilter) { @@ -37,8 +37,8 @@ workflow LONGREAD_PREPROCESSING { ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log ) } else { - PORECHOP ( reads ) - ch_clipped_reads = PORECHOP.out.reads + PORECHOP_PORECHOP ( reads ) + ch_clipped_reads = PORECHOP_PORECHOP.out.reads .map { meta, reads -> def meta_new = meta.clone() @@ -48,9 +48,9 @@ workflow LONGREAD_PREPROCESSING { ch_processed_reads = FILTLONG ( ch_clipped_reads.map{ meta, reads -> [meta, [], reads ]} ).reads - ch_versions = ch_versions.mix(PORECHOP.out.versions.first()) + ch_versions = ch_versions.mix(PORECHOP_PORECHOP.out.versions.first()) ch_versions = ch_versions.mix(FILTLONG.out.versions.first()) - ch_multiqc_files = ch_multiqc_files.mix( PORECHOP.out.log ) + ch_multiqc_files = ch_multiqc_files.mix( PORECHOP_PORECHOP.out.log ) ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log ) } From e4307ad8abfa123d749cd70ce37cff486e804590 Mon Sep 17 00:00:00 2001 From: Sofia Stamouli Date: Tue, 1 Nov 2022 20:50:42 +0100 Subject: [PATCH 09/12] Change modules.config to include porechop_porechop --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 397e0c2..dad18a2 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -134,7 +134,7 @@ process { ] } - withName: PORECHOP { + withName: PORECHOP_PORECHOP { ext.prefix = { "${meta.id}_${meta.run_accession}" } publishDir = [ path: { "${params.outdir}/porechop" }, From d487a1ff2fac2e0a4bf9b3aecc41792000256b6f Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Wed, 2 Nov 2022 12:39:34 +0100 Subject: [PATCH 10/12] Addd FASTP adadapter list --- conf/modules.config | 6 +++--- modules.json | 2 +- modules/nf-core/fastp/main.nf | 5 +++++ modules/nf-core/fastp/meta.yml | 4 ++++ nextflow_schema.json | 4 ++-- subworkflows/local/shortread_fastp.nf | 5 +++-- subworkflows/local/shortread_preprocessing.nf | 2 +- workflows/taxprofiler.nf | 11 +++++++---- 8 files changed, 26 insertions(+), 13 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 9a351cc..f47e587 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -62,7 +62,7 @@ process { ext.args = [ // trimming options params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "", - params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "", + params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "", // filtering options "--length_required ${params.shortread_qc_minlength}", (params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp') ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : '' @@ -82,8 +82,8 @@ process { params.shortread_qc_excludeunmerged ? '' : "--include_unmerged", // trimming options params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "", - params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "", - params.shortread_qc_adapter2 ? "--adapter_sequence_r2 ${params.shortread_qc_adapter2}" : "--detect_adapter_for_pe", + params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "", + params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter2 ? "--adapter_sequence_r2 ${params.shortread_qc_adapter2}" : "--detect_adapter_for_pe", // filtering options "--length_required ${params.shortread_qc_minlength}", params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp' ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : '' diff --git a/modules.json b/modules.json index ab32eec..3527a44 100644 --- a/modules.json +++ b/modules.json @@ -55,7 +55,7 @@ }, "fastp": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + "git_sha": "1e49f31e93c56a3832833eef90a02d3cde5a3f7e" }, "fastqc": { "branch": "master", diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf index 11ea4db..207258a 100644 --- a/modules/nf-core/fastp/main.nf +++ b/modules/nf-core/fastp/main.nf @@ -9,6 +9,7 @@ process FASTP { input: tuple val(meta), path(reads) + path adapter_fasta val save_trimmed_fail val save_merged @@ -27,6 +28,7 @@ process FASTP { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : "" def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' // Added soft-links to original fastqs for consistent naming in MultiQC // Use single ended for interleaved. Add --interleaved_in in config. @@ -40,6 +42,7 @@ process FASTP { --thread $task.cpus \\ --json ${prefix}.fastp.json \\ --html ${prefix}.fastp.html \\ + $adapter_list \\ $fail_fastq \\ $args \\ 2> ${prefix}.fastp.log \\ @@ -61,6 +64,7 @@ process FASTP { --thread $task.cpus \\ --json ${prefix}.fastp.json \\ --html ${prefix}.fastp.html \\ + $adapter_list \\ $fail_fastq \\ $args \\ 2> ${prefix}.fastp.log @@ -82,6 +86,7 @@ process FASTP { --out2 ${prefix}_2.fastp.fastq.gz \\ --json ${prefix}.fastp.json \\ --html ${prefix}.fastp.html \\ + $adapter_list \\ $fail_fastq \\ $merge_fastq \\ --thread $task.cpus \\ diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml index 2368fde..6f6fad7 100644 --- a/modules/nf-core/fastp/meta.yml +++ b/modules/nf-core/fastp/meta.yml @@ -23,6 +23,10 @@ input: List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. If you wish to run interleaved paired-end data, supply as single-end data but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. + - adapter_fasta: + type: file + description: File in FASTA format containing possible adapters to remove. + pattern: "*.{fasta,fna,fas,fa}" - save_trimmed_fail: type: boolean description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz` diff --git a/nextflow_schema.json b/nextflow_schema.json index 04ba938..b96fdac 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -107,8 +107,8 @@ "shortread_qc_adapterlist": { "type": "string", "default": "None", - "fa_icon": "fas fa-list-ul", - "help_text": "This affects AdapterRemoval. It will replace default adapters or whatever is supplied in `--shortread_qc_adapter{1,2}`.\n\nThis allows you to mix libraries built with different adapters into one run.\n\n> Modifies tool parameter(s):\n> - AdapterRemoval: `--adapter-list`." + "description": "Specify a list of all possible adapters to trim. Overrides --shortread_qc_adapter1/2. Formats: .txt (AdapterRemoval) or .fasta. (fastp).", + "help_text": "Allows to supply a file with a list of adapter (combinations) to remove from all files. \n\nOverrides the --shortread_qc_adapter1/--shortread_qc_adapter2 parameters . \n\nFor AdapterRemoval this consists of a two column table with a `.txt` extension: first column represents forward strand, second column for reverse strand. You must supply all possibly combinations, one per line, and this list is applied to all files. See AdapterRemoval documentation for more information.\n\nFor fastp this consists of a standard FASTA format with a `.fasta`/`.fa`/`.fna`/`.fas` extension. The adapter sequence in this file should be at least 6bp long, otherwise it will be skipped. fastp trims the adapters present in the FASTA file one by one.\n\n> Modifies AdapterRemoval parameter: --adapter-list\n> Modifies fastp parameter: --adapter_fasta" }, "shortread_qc_mergepairs": { "type": "boolean", diff --git a/subworkflows/local/shortread_fastp.nf b/subworkflows/local/shortread_fastp.nf index d466041..cac5a27 100644 --- a/subworkflows/local/shortread_fastp.nf +++ b/subworkflows/local/shortread_fastp.nf @@ -8,6 +8,7 @@ include { FASTP as FASTP_PAIRED } from '../../modules/nf-core/fastp/main' workflow SHORTREAD_FASTP { take: reads // [[meta], [reads]] + adapterlist main: ch_versions = Channel.empty() @@ -19,9 +20,9 @@ workflow SHORTREAD_FASTP { paired: it[0]['single_end'] == false } - FASTP_SINGLE ( ch_input_for_fastp.single, false, false ) + FASTP_SINGLE ( ch_input_for_fastp.single, adapterlist, false, false ) // Last parameter here turns on merging of PE data - FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_qc_mergepairs ) + FASTP_PAIRED ( ch_input_for_fastp.paired, adapterlist, false, params.shortread_qc_mergepairs ) if ( params.shortread_qc_mergepairs ) { ch_fastp_reads_prepped_pe = FASTP_PAIRED.out.reads_merged diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index d381d34..c823e3d 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -18,7 +18,7 @@ workflow SHORTREAD_PREPROCESSING { ch_multiqc_files = Channel.empty() if ( params.shortread_qc_tool == "fastp" ) { - ch_processed_reads = SHORTREAD_FASTP ( reads ).reads + ch_processed_reads = SHORTREAD_FASTP ( reads, adapterlist ).reads ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions ) ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc ) } else if ( params.shortread_qc_tool == "adapterremoval" ) { diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index e6ccc16..bf05d54 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -103,7 +103,12 @@ workflow TAXPROFILER { ch_versions = Channel.empty() ch_multiqc_logo= Channel.fromPath("$projectDir/docs/images/nf-core-taxprofiler_logo_custom_light.png") - ch_adapterlist_for_shortreadqc = params.shortread_qc_adapterlist ? file(params.shortread_qc_adapterlist) : [] + adapterlist = params.shortread_qc_adapterlist ? file(params.shortread_qc_adapterlist) : [] + + if ( params.shortread_qc_adapterlist ) { + if ( params.shortread_qc_tool == 'adapterremoval' && !(adapterlist.extension == 'txt') ) error "[nf-core/taxprofiler] ERROR: AdapterRemoval2 adapter list requires a `.txt` format and extension. Check input: --shortread_qc_adapterlist ${params.shortread_qc_adapterlist}" + if ( params.shortread_qc_tool == 'fastp' && !adapterlist.extension.matches(".*(fa|fasta|fna|fas)") ) error "[nf-core/taxprofiler] ERROR: fastp adapter list requires a `.fasta` format and extension (or fa, fas, fna). Check input: --shortread_qc_adapterlist ${params.shortread_qc_adapterlist}" + } /* SUBWORKFLOW: Read in samplesheet, validate and stage input files @@ -134,10 +139,8 @@ workflow TAXPROFILER { SUBWORKFLOW: PERFORM PREPROCESSING */ - ch_adapterlist_for_shortreadqc - if ( params.perform_shortread_qc ) { - ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq, ch_adapterlist_for_shortreadqc ).reads + ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq, adapterlist ).reads ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions ) } else { ch_shortreads_preprocessed = INPUT_CHECK.out.fastq From 24dc2a2cecc77b5a5beb0a16042814374f985df5 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 3 Nov 2022 10:03:07 +0100 Subject: [PATCH 11/12] Apply suggestions from code review Co-authored-by: Moritz E. Beber --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index b96fdac..7c7e8dc 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -108,7 +108,7 @@ "type": "string", "default": "None", "description": "Specify a list of all possible adapters to trim. Overrides --shortread_qc_adapter1/2. Formats: .txt (AdapterRemoval) or .fasta. (fastp).", - "help_text": "Allows to supply a file with a list of adapter (combinations) to remove from all files. \n\nOverrides the --shortread_qc_adapter1/--shortread_qc_adapter2 parameters . \n\nFor AdapterRemoval this consists of a two column table with a `.txt` extension: first column represents forward strand, second column for reverse strand. You must supply all possibly combinations, one per line, and this list is applied to all files. See AdapterRemoval documentation for more information.\n\nFor fastp this consists of a standard FASTA format with a `.fasta`/`.fa`/`.fna`/`.fas` extension. The adapter sequence in this file should be at least 6bp long, otherwise it will be skipped. fastp trims the adapters present in the FASTA file one by one.\n\n> Modifies AdapterRemoval parameter: --adapter-list\n> Modifies fastp parameter: --adapter_fasta" + "help_text": "Allows to supply a file with a list of adapter (combinations) to remove from all files. \n\nOverrides the --shortread_qc_adapter1/--shortread_qc_adapter2 parameters . \n\nFor AdapterRemoval this consists of a two column table with a `.txt` extension: first column represents forward strand, second column for reverse strand. You must supply all possible combinations, one per line, and this list is applied to all files. See AdapterRemoval documentation for more information.\n\nFor fastp this consists of a standard FASTA format with a `.fasta`/`.fa`/`.fna`/`.fas` extension. The adapter sequence in this file should be at least 6bp long, otherwise it will be skipped. fastp trims the adapters present in the FASTA file one by one.\n\n> Modifies AdapterRemoval parameter: --adapter-list\n> Modifies fastp parameter: --adapter_fasta" }, "shortread_qc_mergepairs": { "type": "boolean", From 62f45c8be7b1f592843cb2a4ca0c214e51beaf81 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 3 Nov 2022 10:03:27 +0100 Subject: [PATCH 12/12] Update nextflow.config --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 9bf7f83..d11abd9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -69,7 +69,7 @@ params { shortread_qc_excludeunmerged = false shortread_qc_adapter1 = null shortread_qc_adapter2 = null - shortread_qc_adapterlist = null // TODO Add for FASTP + shortread_qc_adapterlist = null shortread_qc_minlength = 15 perform_longread_qc = false