From 4529e7e5457df2de7557c6e8d7c1295f3c24daa7 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Thu, 1 Dec 2022 17:25:25 +0100 Subject: [PATCH 1/3] refactor: change parameter exclude unmerged to include --- .github/workflows/ci.yml | 4 ++-- conf/modules.config | 2 +- docs/usage.md | 2 +- nextflow.config | 2 +- nextflow_schema.json | 6 +++--- subworkflows/local/shortread_adapterremoval.nf | 4 ++-- workflows/taxprofiler.nf | 2 +- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dad2838..11db52e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,10 +28,10 @@ jobs: - "--perform_longread_qc false" - "--perform_shortread_qc false" - "--shortread_qc_tool fastp" - - "--shortread_qc_tool fastp --shortread_qc_mergepairs --shortread_qc_excludeunmerged" + - "--shortread_qc_tool fastp --shortread_qc_mergepairs --shortread_qc_includeunmerged" - "--shortread_qc_tool fastp --shortread_qc_mergepairs" - "--shortread_qc_tool adapterremoval" - - "--shortread_qc_tool adapterremoval --shortread_qc_mergepairs --shortread_qc_excludeunmerged" + - "--shortread_qc_tool adapterremoval --shortread_qc_mergepairs --shortread_qc_includeunmerged" - "--shortread_qc_tool adapterremoval --shortread_qc_mergepairs" - "--shortread_complexityfilter_tool bbduk" - "--shortread_complexityfilter_tool prinseqplusplus" diff --git a/conf/modules.config b/conf/modules.config index f6045e5..dd85c0c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -79,7 +79,7 @@ process { withName: FASTP_PAIRED { ext.args = [ // collapsing options - option to retain singletons - params.shortread_qc_excludeunmerged ? '' : "--include_unmerged", + params.shortread_qc_includeunmerged ? '--include_unmerged' : '', // trimming options params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "", params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "", diff --git a/docs/usage.md b/docs/usage.md index d21761f..d2f41bf 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -204,7 +204,7 @@ It is highly recommended to run this on raw reads to remove artifacts from seque There are currently two options for short-read preprocessing: `fastp` or `adapterremoval`. For adapter clipping, you can either rely on tool default adapter sequences, or supply your own adapters (`--shortread_qc_adapter1` and `--shortread_qc_adapter2`) -By default, paired-end merging is not activated and paired-end profiling is performed where supported otherwise pairs will be independently profiled. If paired-end merging is activated you can also specify whether to exclude unmerged reads in the reads sent for profiling (`--shortread_qc_mergepairs` and `--shortread_qc_excludeunmerged`). +By default, paired-end merging is not activated and paired-end profiling is performed where supported otherwise pairs will be independently profiled. If paired-end merging is activated you can also specify whether to include unmerged reads in the reads sent for profiling (`--shortread_qc_mergepairs` and `--shortread_qc_includeunmerged`). You can also turn off clipping and only perform paired-end merging, if requested. This can be useful when processing data downloaded from the ENA, SRA, or DDBJ (`--shortread_qc_skipadaptertrim`). Both tools support length filtering of reads and can be tuned with `--shortread_qc_minlength`. Performing length filtering can be useful to remove short (often low sequencing complexity) sequences that result in unspecific classification and therefore slow down runtime during profiling, with minimal gain. diff --git a/nextflow.config b/nextflow.config index 3cf160d..54e220d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -66,7 +66,7 @@ params { shortread_qc_tool = 'fastp' shortread_qc_skipadaptertrim = false shortread_qc_mergepairs = true - shortread_qc_excludeunmerged = false + shortread_qc_includeunmerged = false shortread_qc_adapter1 = null shortread_qc_adapter2 = null shortread_qc_adapterlist = null diff --git a/nextflow_schema.json b/nextflow_schema.json index ae4e311..350830c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -125,11 +125,11 @@ "default": true, "help_text": "Turn on the merging of read-pairs of paired-end short read sequencing data for AdapterRemoval (this is performed automatically with fastp).\n\n> Modifies tool parameter(s):\n> - AdapterRemoval: `--collapse`\n" }, - "shortread_qc_excludeunmerged": { + "shortread_qc_includeunmerged": { "type": "boolean", "fa_icon": "far fa-times-circle", - "description": "Discard unmerged reads from paired-end merging", - "help_text": "Turns off the inclusion of unmerged reads in resulting processing FASTQ file of paired-end sequencing data when using `fastp`.\n\nThis can be useful in cases where you prefer to have very short reads (e.g. aDNA), thus excluding longer-reads or possibly faulty reads where one of the pair was discarded.\n\n> Modifies tool parameter(s):\n> - removed from reads `--include_unmerged`\n" + "description": "Include unmerged reads from paired-end merging in the downstream processing", + "help_text": "Turns on the inclusion of unmerged reads in resulting processing FASTQ file of paired-end sequencing data when using `fastp`.\n\nExcluding unmerged reads can be useful in cases where you prefer to have very short reads (e.g. aDNA), thus excluding longer-reads or possibly faulty reads where one of the pair was discarded.\n\n> Modifies tool parameter(s):\n> - adds option `--include_unmerged`\n" }, "shortread_qc_minlength": { "type": "integer", diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf index 6567007..4340e0d 100644 --- a/subworkflows/local/shortread_adapterremoval.nf +++ b/subworkflows/local/shortread_adapterremoval.nf @@ -30,7 +30,7 @@ workflow SHORTREAD_ADAPTERREMOVAL { * has to be exported in a separate channel and we must manually recombine when necessary. */ - if ( params.shortread_qc_mergepairs && !params.shortread_qc_excludeunmerged ) { + if ( params.shortread_qc_mergepairs && params.shortread_qc_includeunmerged ) { ch_concat_fastq = Channel.empty() .mix( @@ -55,7 +55,7 @@ workflow SHORTREAD_ADAPTERREMOVAL { ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads .mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated) - } else if ( params.shortread_qc_mergepairs && params.shortread_qc_excludeunmerged ) { + } else if ( params.shortread_qc_mergepairs && !params.shortread_qc_includeunmerged ) { ch_concat_fastq = Channel.empty() .mix( diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index ba52a4f..68fc47a 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -28,7 +28,7 @@ if ( params.input ) { if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' } if (params.shortread_qc_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files." -if (params.shortread_qc_excludeunmerged && !params.shortread_qc_mergepairs) exit 1, "ERROR: [nf-core/taxprofiler] cannot include unmerged reads when merging not turned on. Please specify --shortread_qc_mergepairs" +if (params.shortread_qc_includeunmerged && !params.shortread_qc_mergepairs) exit 1, "ERROR: [nf-core/taxprofiler] cannot include unmerged reads when merging is not turned on. Please specify --shortread_qc_mergepairs" if (params.shortread_complexityfilter_tool == 'fastp' && ( params.perform_shortread_qc == false || params.shortread_qc_tool != 'fastp' )) exit 1, "ERROR: [nf-core/taxprofiler] cannot use fastp complexity filtering if preprocessing not turned on and/or tool is not fastp. Please specify --perform_shortread_qc and/or --shortread_qc_tool 'fastp'" From b2088bcbb3bfc1057543df056783dcb3defdfd4a Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Sun, 4 Dec 2022 21:53:38 +0100 Subject: [PATCH 2/3] docs: improve description of `shortread_qc_includeunmerged` --- nextflow_schema.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 350830c..02085d5 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -128,8 +128,8 @@ "shortread_qc_includeunmerged": { "type": "boolean", "fa_icon": "far fa-times-circle", - "description": "Include unmerged reads from paired-end merging in the downstream processing", - "help_text": "Turns on the inclusion of unmerged reads in resulting processing FASTQ file of paired-end sequencing data when using `fastp`.\n\nExcluding unmerged reads can be useful in cases where you prefer to have very short reads (e.g. aDNA), thus excluding longer-reads or possibly faulty reads where one of the pair was discarded.\n\n> Modifies tool parameter(s):\n> - adds option `--include_unmerged`\n" + "description": "Include unmerged reads from paired-end merging in the downstream analysis", + "help_text": "Turns on the inclusion of unmerged reads in resulting FASTQ file from merging paired-end sequencing data when using `fastp` and/or `AdapterRemoval`. For `fastp` this means the unmerged read pairs are directly included in the output FASTQ file. For `AdapterRemoval`, additional output files containing unmerged reads are all concatenated into one file by the workflow.\n\nExcluding unmerged reads can be useful in cases where you prefer to have very short reads (e.g. aDNA), thus excluding longer-reads or possibly faulty reads where one of the pair was discarded.\n\n> Adds `fastp` option: `--include_unmerged`\n" }, "shortread_qc_minlength": { "type": "integer", From 24e5bc54101873f86a5e64610162769651ba8f26 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Sun, 4 Dec 2022 21:54:10 +0100 Subject: [PATCH 3/3] refactor: use shorter and more explicit syntax --- subworkflows/local/shortread_adapterremoval.nf | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf index 4340e0d..7f5a0fb 100644 --- a/subworkflows/local/shortread_adapterremoval.nf +++ b/subworkflows/local/shortread_adapterremoval.nf @@ -40,9 +40,7 @@ workflow SHORTREAD_ADAPTERREMOVAL { ADAPTERREMOVAL_PAIRED.out.paired_truncated ) .map { meta, reads -> - def meta_new = meta.clone() - meta_new.single_end = true - [meta_new, reads] + [meta + [single_end: true], reads] } .groupTuple() // Paired-end reads cause a nested tuple during grouping. @@ -63,9 +61,7 @@ workflow SHORTREAD_ADAPTERREMOVAL { ADAPTERREMOVAL_PAIRED.out.collapsed_truncated ) .map { meta, reads -> - def meta_new = meta.clone() - meta_new.single_end = true - [meta_new, reads] + [meta + [single_end: true], reads] } .groupTuple() .map { meta, fastq -> [meta, fastq.flatten()] }