From 031cb45934cc3e66909d6cdd7b38527508353250 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Sun, 1 May 2022 07:18:14 +0200 Subject: [PATCH 1/5] Add initial longread QC filtering - requires filtlong module update --- .github/workflows/ci.yml | 18 +++---- CITATIONS.md | 2 + conf/modules.config | 48 ++++++++++++------ conf/test.config | 4 +- docs/usage.md | 10 ++-- modules.json | 3 ++ modules/nf-core/modules/filtlong/main.nf | 36 +++++++++++++ modules/nf-core/modules/filtlong/meta.yml | 50 +++++++++++++++++++ nextflow.config | 28 +++++++---- nextflow_schema.json | 36 +++++++++---- subworkflows/local/longread_preprocessing.nf | 45 +++++++++++++---- .../local/shortread_adapterremoval.nf | 4 +- subworkflows/local/shortread_fastp.nf | 4 +- subworkflows/local/shortread_preprocessing.nf | 4 +- workflows/taxprofiler.nf | 15 +++--- 15 files changed, 233 insertions(+), 74 deletions(-) create mode 100644 modules/nf-core/modules/filtlong/main.nf create mode 100644 modules/nf-core/modules/filtlong/meta.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a1ece72..cb531b7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,18 +29,18 @@ jobs: - NXF_VER: "" NXF_EDGE: "1" parameters: - - "--perform_longread_clip false" - - "--perform_shortread_clipmerge false" - - "--shortread_clipmerge_tool fastp" - - "--shortread_clipmerge_tool fastp --shortread_clipmerge_mergepairs --shortread_clipmerge_excludeunmerged" - - "--shortread_clipmerge_tool fastp --shortread_clipmerge_mergepairs" - - "--shortread_clipmerge_tool adapterremoval" - - "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs --shortread_clipmerge_excludeunmerged" - - "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs" + - "--perform_longread_qc false" + - "--perform_shortread_qc false" + - "--shortread_qc_tool fastp" + - "--shortread_qc_tool fastp --shortread_qc_mergepairs --shortread_qc_excludeunmerged" + - "--shortread_qc_tool fastp --shortread_qc_mergepairs" + - "--shortread_qc_tool adapterremoval" + - "--shortread_qc_tool adapterremoval --shortread_qc_mergepairs --shortread_qc_excludeunmerged" + - "--shortread_qc_tool adapterremoval --shortread_qc_mergepairs" - "--shortread_complexityfilter_tool bbduk" - "--shortread_complexityfilter_tool prinseq" - "--perform_runmerging" - - "--perform_runmerging --shortread_clipmerge_mergepairs" + - "--perform_runmerging --shortread_qc_mergepairs" - "--shortread_complexityfilter false --perform_shortread_hostremoval" steps: diff --git a/CITATIONS.md b/CITATIONS.md index fd8c52a..8044658 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -56,6 +56,8 @@ > Buchfink, Benjamin, Chao Xie, and Daniel H. Huson. 2015. “Fast and Sensitive Protein Alignment Using DIAMOND.” Nature Methods 12 (1): 59-60. doi: 10.1038/nmeth.3176. +- [FILTLONG](https://github.com/rrwick/Filtlong) + ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) diff --git a/conf/modules.config b/conf/modules.config index d8fb382..8481e7b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -51,10 +51,10 @@ process { withName: FASTP_SINGLE { ext.args = [ // trimming options - params.shortread_clipmerge_skipadaptertrim ? "--disable_adapter_trimming" : "", - params.shortread_clipmerge_adapter1 ? "--adapter_sequence ${params.shortread_clipmerge_adapter1}" : "", + params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "", + params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "", // filtering options - "--length_required ${params.shortread_clipmerge_minlength}" + "--length_required ${params.shortread_qc_minlength}" ].join(' ').trim() ext.prefix = { "${meta.id}_${meta.run_accession}" } publishDir = [ @@ -68,13 +68,13 @@ process { withName: FASTP_PAIRED { ext.args = [ // collapsing options - option to retain singletons - params.shortread_clipmerge_excludeunmerged ? '' : "--include_unmerged", + params.shortread_qc_excludeunmerged ? '' : "--include_unmerged", // trimming options - params.shortread_clipmerge_skipadaptertrim ? "--disable_adapter_trimming" : "", - params.shortread_clipmerge_adapter1 ? "--adapter_sequence ${params.shortread_clipmerge_adapter1}" : "", - params.shortread_clipmerge_adapter2 ? "--adapter_sequence_r2 ${params.shortread_clipmerge_adapter2}" : "--detect_adapter_for_pe", + params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "", + params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "", + params.shortread_qc_adapter2 ? "--adapter_sequence_r2 ${params.shortread_qc_adapter2}" : "--detect_adapter_for_pe", // filtering options - "--length_required ${params.shortread_clipmerge_minlength}" + "--length_required ${params.shortread_qc_minlength}" ].join(' ').trim() ext.prefix = { "${meta.id}_${meta.run_accession}" } publishDir = [ @@ -88,10 +88,10 @@ process { withName: ADAPTERREMOVAL_SINGLE { ext.args = [ // trimming options - params.shortread_clipmerge_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "", - params.shortread_clipmerge_adapter1 ? "--adapter1 ${params.shortread_clipmerge_adapter1}" : "", + params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "", + params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "", // filtering options - "--minlength ${params.shortread_clipmerge_minlength}" + "--minlength ${params.shortread_qc_minlength}" ].join(' ').trim() ext.prefix = { "${meta.id}_${meta.run_accession}" } publishDir = [ @@ -105,13 +105,13 @@ process { withName: ADAPTERREMOVAL_PAIRED { ext.args = [ // collapsing options - params.shortread_clipmerge_mergepairs ? "--collapse" : "", + params.shortread_qc_mergepairs ? "--collapse" : "", // trimming options - params.shortread_clipmerge_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "", - params.shortread_clipmerge_adapter1 ? "--adapter1 ${params.shortread_clipmerge_adapter1}" : "", - params.shortread_clipmerge_adapter2 ? "--adapter2 ${params.shortread_clipmerge_adapter2}" : "", + params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "", + params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "", + params.shortread_qc_adapter2 ? "--adapter2 ${params.shortread_qc_adapter2}" : "", // filtering options - "--minlength ${params.shortread_clipmerge_minlength}" + "--minlength ${params.shortread_qc_minlength}" ].join(' ').trim() ext.prefix = { "${meta.id}_${meta.run_accession}" } publishDir = [ @@ -132,6 +132,22 @@ process { ] } + withName: FILTLONG { + ext.args = [ + "--min_length ${params.longread_qc_minlength}", + "--keep_percent ${params.longread_qc_keepbppercent}", + "--target_bases ${params.longread_qc_targetnbases}" + ] + .join(' ').trim() + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ + path: { "${params.outdir}/porechop" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.save_preprocessed_reads + ] + } + withName: BOWTIE2_BUILD { publishDir = [ path: { "${params.outdir}/bowtie2/build" }, diff --git a/conf/test.config b/conf/test.config index a2464b2..ac7b0d3 100644 --- a/conf/test.config +++ b/conf/test.config @@ -24,8 +24,8 @@ params { // TODO nf-core: Give any required params for the test so that command line flags are not needed input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv' - perform_shortread_clipmerge = true - perform_longread_clip = false + perform_shortread_qc = true + perform_longread_qc = false perform_shortread_complexityfilter = true perform_shortread_hostremoval = true shortread_hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' diff --git a/docs/usage.md b/docs/usage.md index cee2bb6..34af0eb 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -162,16 +162,16 @@ nf-core/taxprofiler offers four main preprocessing steps #### Read Processing -Raw sequencing read processing in the form of adapter clipping and paired-end read merging can be activated via the `--perform_shortread_clipmerge` or `--perform_longread_clip` flags. +Raw sequencing read processing in the form of adapter clipping and paired-end read merging can be activated via the `--perform_shortread_qc` or `--perform_longread_qc` flags. It is highly recommended to run this on raw reads to remove artefacts from sequencing that can cause false positive identification of taxa (e.g. contaminated reference genomes) and/or skews in taxonomic abundance profiles. There are currently two options for short-read preprocessing: `fastp` or `adapterremoval`. -For adapter clipping, you can either rely on tool default adapter sequences, or supply your own adapters (`--shortread_clipmerge_adapter1` and `--shortread_clipmerge_adapter2`) -By default, paired-end merging is not activated and paired-end profiling is performed where supported otherwise pairs will be independently profiled. If paired-end merging is activated you can also specify whether to exclude unmerged reads in the reads sent for profiling (`--shortread_clipmerge_mergepairs` and `--shortread_clipmerge_excludeunmerged`). -You can also turn off clipping and only perform paired-end merging, if requested. This can be useful when processing data downloaded from the ENA, SRA, or DDBJ (`--shortread_clipmerge_skipadaptertrim`). -Both tools support length filtering of reads and can be tuned with `--shortread_clipmerge_minlength`. Performing length filtering can be useful to remove short (often low sequencing complexity) sequences that result in unspecific classification and therefore slow down runtime during profiling, with minimal gain. +For adapter clipping, you can either rely on tool default adapter sequences, or supply your own adapters (`--shortread_qc_adapter1` and `--shortread_qc_adapter2`) +By default, paired-end merging is not activated and paired-end profiling is performed where supported otherwise pairs will be independently profiled. If paired-end merging is activated you can also specify whether to exclude unmerged reads in the reads sent for profiling (`--shortread_qc_mergepairs` and `--shortread_qc_excludeunmerged`). +You can also turn off clipping and only perform paired-end merging, if requested. This can be useful when processing data downloaded from the ENA, SRA, or DDBJ (`--shortread_qc_skipadaptertrim`). +Both tools support length filtering of reads and can be tuned with `--shortread_qc_minlength`. Performing length filtering can be useful to remove short (often low sequencing complexity) sequences that result in unspecific classification and therefore slow down runtime during profiling, with minimal gain. There is currently one option for long-read Oxford Nanopore processing: `porechop`. diff --git a/modules.json b/modules.json index a65926c..21c6a89 100644 --- a/modules.json +++ b/modules.json @@ -36,6 +36,9 @@ "fastqc": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, + "filtlong": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, "kaiju/kaiju": { "git_sha": "8856f127c58f6af479128be8b8df4d42e442ddbe" }, diff --git a/modules/nf-core/modules/filtlong/main.nf b/modules/nf-core/modules/filtlong/main.nf new file mode 100644 index 0000000..0e6fdd5 --- /dev/null +++ b/modules/nf-core/modules/filtlong/main.nf @@ -0,0 +1,36 @@ +process FILTLONG { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::filtlong=0.2.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/filtlong:0.2.1--h9a82719_0' : + 'quay.io/biocontainers/filtlong:0.2.1--h9a82719_0' }" + + input: + tuple val(meta), path(shortreads), path(longreads) + + output: + tuple val(meta), path("${meta.id}_lr_filtlong.fastq.gz"), emit: reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def short_reads = meta.single_end ? "-1 $shortreads" : "-1 ${shortreads[0]} -2 ${shortreads[1]}" + """ + filtlong \\ + $short_reads \\ + $args \\ + $longreads \\ + | gzip -n > ${prefix}_lr_filtlong.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + filtlong: \$( filtlong --version | sed -e "s/Filtlong v//g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/filtlong/meta.yml b/modules/nf-core/modules/filtlong/meta.yml new file mode 100644 index 0000000..b3626e6 --- /dev/null +++ b/modules/nf-core/modules/filtlong/meta.yml @@ -0,0 +1,50 @@ +name: filtlong +description: Filtlong filters long reads based on quality measures or short read data. +keywords: + - nanopore + - quality control + - QC + - filtering + - long reads + - short reads +tools: + - filtlong: + description: Filtlong is a tool for filtering long reads. It can take a set of long reads and produce a smaller, better subset. It uses both read length (longer is better) and read identity (higher is better) when choosing which reads pass the filter. + homepage: https://anaconda.org/bioconda/filtlong + documentation: None + tool_dev_url: https://github.com/rrwick/Filtlong + doi: "" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - shortreads: + type: file + description: fastq file + pattern: "*.{fq,fastq,fq.gz,fastq.gz}" + - longreads: + type: file + description: fastq file + pattern: "*.{fq,fastq,fq.gz,fastq.gz}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reads: + type: file + description: Filtered (compressed) fastq file + pattern: "*.fastq.gz" + +authors: + - "@d4straub" diff --git a/nextflow.config b/nextflow.config index 5644786..145e53b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -55,16 +55,23 @@ params { databases = null // FASTQ preprocessing - perform_shortread_clipmerge = false - shortread_clipmerge_tool = 'fastp' - shortread_clipmerge_skipadaptertrim = false - shortread_clipmerge_mergepairs = false - shortread_clipmerge_excludeunmerged = false - shortread_clipmerge_adapter1 = null - shortread_clipmerge_adapter2 = null - shortread_clipmerge_minlength = 15 - perform_longread_clip = false - save_preprocessed_reads = false + perform_shortread_qc = false + shortread_qc_tool = 'fastp' + shortread_qc_skipadaptertrim = false + shortread_qc_mergepairs = false + shortread_qc_excludeunmerged = false + shortread_qc_adapter1 = null + shortread_qc_adapter2 = null + shortread_qc_minlength = 15 + + perform_longread_qc = false + longread_qc_run_clip = false + longread_qc_run_filter = false + longread_qc_minlength = 1000 + longread_qc_keepbppercent = 90 + longread_qc_targetnbases = 500000000 + + save_preprocessed_reads = false // Complexity filtering perform_shortread_complexityfilter = false @@ -185,6 +192,7 @@ profiles { } // Load igenomes.config if required + if (!params.igenomes_ignore) { includeConfig 'conf/igenomes.config' } else { diff --git a/nextflow_schema.json b/nextflow_schema.json index f429d1b..1a590bb 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -262,7 +262,7 @@ "type": "string", "default": "None" }, - "shortread_clipmerge_excludeunmerged": { + "shortread_qc_excludeunmerged": { "type": "boolean" }, "run_malt": { @@ -291,26 +291,26 @@ "type": "boolean", "description": "Enable MetaPhlAn for taxonomic profiling" }, - "shortread_clipmerge_tool": { + "shortread_qc_tool": { "type": "string", "default": "fastp", "enum": ["fastp", "adapterremoval"] }, - "shortread_clipmerge_skipadaptertrim": { + "shortread_qc_skipadaptertrim": { "type": "boolean" }, - "shortread_clipmerge_mergepairs": { + "shortread_qc_mergepairs": { "type": "boolean" }, - "shortread_clipmerge_adapter1": { + "shortread_qc_adapter1": { "type": "string", "default": "None" }, - "shortread_clipmerge_adapter2": { + "shortread_qc_adapter2": { "type": "string", "default": "None" }, - "shortread_clipmerge_minlength": { + "shortread_qc_minlength": { "type": "integer", "default": 15 }, @@ -347,10 +347,10 @@ "save_runmerged_reads": { "type": "boolean" }, - "perform_shortread_clipmerge": { + "perform_shortread_qc": { "type": "boolean" }, - "perform_longread_clip": { + "perform_longread_qc": { "type": "boolean" }, "perform_shortread_complexityfilter": { @@ -397,6 +397,24 @@ "type": "string", "default": "tsv", "enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"] + }, + "longread_qc_run_clip": { + "type": "boolean" + }, + "longread_qc_run_filter": { + "type": "boolean" + }, + "longread_qc_minlength": { + "type": "integer", + "default": 1000 + }, + "longread_qc_keepbppercent": { + "type": "integer", + "default": 90 + }, + "longread_qc_targetnbases": { + "type": "integer", + "default": 500000000 } } } diff --git a/subworkflows/local/longread_preprocessing.nf b/subworkflows/local/longread_preprocessing.nf index 2fa5f3b..5ae5417 100644 --- a/subworkflows/local/longread_preprocessing.nf +++ b/subworkflows/local/longread_preprocessing.nf @@ -4,6 +4,7 @@ include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main' include { PORECHOP } from '../../modules/nf-core/modules/porechop/main' +include { FILTLONG } from '../../modules/nf-core/modules/filtlong/main' workflow LONGREAD_PREPROCESSING { take: @@ -13,21 +14,43 @@ workflow LONGREAD_PREPROCESSING { ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() - PORECHOP ( reads ) + if ( params.longread_qc_run_clip && !params.longread_qc_run_filter ) { + PORECHOP ( reads ) - ch_processed_reads = PORECHOP.out.reads - .map { - meta, reads -> - def meta_new = meta.clone() - meta_new['single_end'] = 1 - [ meta_new, reads ] - } + ch_processed_reads = PORECHOP.out.reads + .map { + meta, reads -> + def meta_new = meta.clone() + meta_new['single_end'] = 1 + [ meta_new, reads ] - FASTQC_PROCESSED ( PORECHOP.out.reads ) - ch_versions = ch_versions.mix(PORECHOP.out.versions.first()) + ch_versions = ch_versions.mix(PORECHOP.out.versions.first()) + } + } else if ( !params.longread_qc_run_clip && params.longread_qc_run_filter ) { + + ch_processed_reads = FILTLONG ( reads.map{ meta, reads -> [meta, [], reads ]} ) + ch_versions = ch_versions.mix(FILTLONG.out.versions.first()) + + } else { + PORECHOP ( reads ) + ch_clipped_reads = PORECHOP.out.reads + .map { + meta, reads -> + def meta_new = meta.clone() + meta_new['single_end'] = 1 + [ meta_new, reads ] + } + + ch_processed_reads = FILTLONG ( ch_clipped_reads.map{ meta, reads -> [meta, [], reads ]} ).reads + + ch_versions = ch_versions.mix(PORECHOP.out.versions.first()) + ch_versions = ch_versions.mix(FILTLONG.out.versions.first()) + + } + + FASTQC_PROCESSED ( ch_processed_reads.dump(tag: "filtlong") ) ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip ) - emit: reads = ch_processed_reads // channel: [ val(meta), [ reads ] ] versions = ch_versions // channel: [ versions.yml ] diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf index b573be9..e491423 100644 --- a/subworkflows/local/shortread_adapterremoval.nf +++ b/subworkflows/local/shortread_adapterremoval.nf @@ -29,7 +29,7 @@ workflow SHORTREAD_ADAPTERREMOVAL { * has to be exported in a separate channel and we must manually recombine when necessary. */ - if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) { + if ( params.shortread_qc_mergepairs && !params.shortread_qc_excludeunmerged ) { ch_concat_fastq = Channel.empty() .mix( @@ -54,7 +54,7 @@ workflow SHORTREAD_ADAPTERREMOVAL { ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads .mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated) - } else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) { + } else if ( params.shortread_qc_mergepairs && params.shortread_qc_excludeunmerged ) { ch_concat_fastq = Channel.empty() .mix( diff --git a/subworkflows/local/shortread_fastp.nf b/subworkflows/local/shortread_fastp.nf index 6fed2ae..05e0f3d 100644 --- a/subworkflows/local/shortread_fastp.nf +++ b/subworkflows/local/shortread_fastp.nf @@ -21,9 +21,9 @@ workflow SHORTREAD_FASTP { FASTP_SINGLE ( ch_input_for_fastp.single, false, false ) // Last parameter here turns on merging of PE data - FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_clipmerge_mergepairs ) + FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_qc_mergepairs ) - if ( params.shortread_clipmerge_mergepairs ) { + if ( params.shortread_qc_mergepairs ) { ch_fastp_reads_prepped_pe = FASTP_PAIRED.out.reads_merged .map { meta, reads -> diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index b0ac25e..977a317 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -15,11 +15,11 @@ workflow SHORTREAD_PREPROCESSING { ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() - if ( params.shortread_clipmerge_tool == "fastp" ) { + if ( params.shortread_qc_tool == "fastp" ) { ch_processed_reads = SHORTREAD_FASTP ( reads ).reads ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions ) ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc ) - } else if ( params.shortread_clipmerge_tool == "adapterremoval" ) { + } else if ( params.shortread_qc_tool == "adapterremoval" ) { ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions ) ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc ) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index a0046b2..2279fd5 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -19,8 +19,11 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true // Check mandatory parameters if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' } -if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files." -if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "ERROR: [nf-core/taxprofiler] cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs" + +if (params.shortread_qc_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] MALT does not accept uncollapsed paired-short reads. Pairs will be profiled as separate files." +if (params.shortread_qc_excludeunmerged && !params.shortread_qc_mergepairs) exit 1, "ERROR: [nf-core/taxprofiler] cannot include unmerged reads when merging not turned on. Please specify --shortread_qc_mergepairs" + +if ( (params.longread_qc_run_clip || params.longread_qc_run_filter) & !params.perform_longread_qc ) exit 1, "ERROR: [nf-core/taxprofiler] --longread_qc_run_clip or --longread_qc_run_filter requested but quality-control not turned on. Please specify --perform_long_qc" if (params.perform_shortread_hostremoval && !params.shortread_hostremoval_reference) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval requested but no --shortread_hostremoval_reference FASTA supplied. Check input." } if (!params.shortread_hostremoval_reference && params.shortread_hostremoval_reference_index) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval_index provided but no --shortread_hostremoval_reference FASTA supplied. Check input." } @@ -110,14 +113,14 @@ workflow TAXPROFILER { /* SUBWORKFLOW: PERFORM PREPROCESSING */ - if ( params.perform_shortread_clipmerge ) { + if ( params.perform_shortread_qc ) { ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq ).reads ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions ) } else { ch_shortreads_preprocessed = INPUT_CHECK.out.fastq } - if ( params.perform_longread_clip ) { + if ( params.perform_longread_qc ) { ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads .map { it -> [ it[0], [it[1]] ] } ch_versions = ch_versions.mix( LONGREAD_PREPROCESSING.out.versions ) @@ -211,11 +214,11 @@ workflow TAXPROFILER { ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - if (params.perform_shortread_clipmerge) { + if (params.perform_shortread_qc) { ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) ) } - if (params.perform_longread_clip) { + if (params.perform_longread_qc) { ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) ) } From 792e2d019b94eb0d1f43da355350f8a066ecf0a2 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 31 May 2022 13:06:48 +0200 Subject: [PATCH 2/5] Fix merge error in schema --- nextflow_schema.json | 1 + 1 file changed, 1 insertion(+) diff --git a/nextflow_schema.json b/nextflow_schema.json index 179daea..c38e5d1 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -427,5 +427,6 @@ "longread_qc_targetnbases": { "type": "integer", "default": 500000000 + } } } From 16ec5bf74af50fbdcf675463d6ab030ae00d56b7 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 31 May 2022 20:11:19 +0200 Subject: [PATCH 3/5] Update parameter names --- conf/modules.config | 8 ++++---- conf/test_nopreprocessing.config | 4 ++-- conf/test_noprofiling.config | 4 ++-- modules.json | 2 +- modules/nf-core/modules/filtlong/main.nf | 7 ++++--- workflows/taxprofiler.nf | 6 +++--- 6 files changed, 16 insertions(+), 15 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 797a209..164fe91 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -54,7 +54,7 @@ process { params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "", params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "", // filtering options - "--length_required ${params.shortread_clipmerge_minlength}", + "--length_required ${params.shortread_qc_minlength}", (params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp') ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : '' ].join(' ').trim() ext.prefix = { "${meta.id}_${meta.run_accession}" } @@ -75,7 +75,7 @@ process { params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "", params.shortread_qc_adapter2 ? "--adapter_sequence_r2 ${params.shortread_qc_adapter2}" : "--detect_adapter_for_pe", // filtering options - "--length_required ${params.shortread_clipmerge_minlength}", + "--length_required ${params.shortread_qc_minlength}", params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp' ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : '' ].join(' ').trim() ext.prefix = { "${meta.id}_${meta.run_accession}" } @@ -141,9 +141,9 @@ process { "--target_bases ${params.longread_qc_targetnbases}" ] .join(' ').trim() - ext.prefix = { "${meta.id}_${meta.run_accession}" } + ext.prefix = { "${meta.id}_${meta.run_accession}_filtered" } publishDir = [ - path: { "${params.outdir}/porechop" }, + path: { "${params.outdir}/filtlong" }, mode: params.publish_dir_mode, pattern: '*.fastq.gz', enabled: params.save_preprocessed_reads diff --git a/conf/test_nopreprocessing.config b/conf/test_nopreprocessing.config index e8d4ed9..60cdde8 100644 --- a/conf/test_nopreprocessing.config +++ b/conf/test_nopreprocessing.config @@ -24,8 +24,8 @@ params { // TODO nf-core: Give any required params for the test so that command line flags are not needed input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv' - perform_shortread_clipmerge = false - perform_longread_clip = false + perform_shortread_qc = false + perform_longread_qc = false perform_shortread_complexityfilter = false perform_shortread_hostremoval = false perform_longread_hostremoval = false diff --git a/conf/test_noprofiling.config b/conf/test_noprofiling.config index f908651..379aaae 100644 --- a/conf/test_noprofiling.config +++ b/conf/test_noprofiling.config @@ -24,8 +24,8 @@ params { // TODO nf-core: Give any required params for the test so that command line flags are not needed input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv' - perform_shortread_clipmerge = true - perform_longread_clip = true + perform_shortread_qc = true + perform_longread_qc = true perform_shortread_complexityfilter = true perform_shortread_hostremoval = true perform_longread_hostremoval = true diff --git a/modules.json b/modules.json index 9520707..b27939b 100644 --- a/modules.json +++ b/modules.json @@ -37,7 +37,7 @@ "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "filtlong": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "723852bf3d3b12059b2f53da8bc055206f3019d7" }, "kaiju/kaiju": { "git_sha": "8856f127c58f6af479128be8b8df4d42e442ddbe" diff --git a/modules/nf-core/modules/filtlong/main.nf b/modules/nf-core/modules/filtlong/main.nf index 0e6fdd5..9dbf05b 100644 --- a/modules/nf-core/modules/filtlong/main.nf +++ b/modules/nf-core/modules/filtlong/main.nf @@ -11,7 +11,7 @@ process FILTLONG { tuple val(meta), path(shortreads), path(longreads) output: - tuple val(meta), path("${meta.id}_lr_filtlong.fastq.gz"), emit: reads + tuple val(meta), path("*.fastq.gz"), emit: reads path "versions.yml" , emit: versions when: @@ -20,13 +20,14 @@ process FILTLONG { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def short_reads = meta.single_end ? "-1 $shortreads" : "-1 ${shortreads[0]} -2 ${shortreads[1]}" + def short_reads = !shortreads ? "" : meta.single_end ? "-1 $shortreads" : "-1 ${shortreads[0]} -2 ${shortreads[1]}" + if ("$longreads" == "${prefix}.fastq.gz") error "Longread FASTQ input and output names are the same, set prefix in module configuration to disambiguate!" """ filtlong \\ $short_reads \\ $args \\ $longreads \\ - | gzip -n > ${prefix}_lr_filtlong.fastq.gz + | gzip -n > ${prefix}.fastq.gz cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index a6ddaa3..1c48fd6 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -20,11 +20,11 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' } -if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files." -if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "ERROR: [nf-core/taxprofiler] cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs" +if (params.shortread_qc_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files." +if (params.shortread_qc_excludeunmerged && !params.shortread_qc_mergepairs) exit 1, "ERROR: [nf-core/taxprofiler] cannot include unmerged reads when merging not turned on. Please specify --shortread_qc_mergepairs" if ( (params.longread_qc_run_clip || params.longread_qc_run_filter) & !params.perform_longread_qc ) exit 1, "ERROR: [nf-core/taxprofiler] --longread_qc_run_clip or --longread_qc_run_filter requested but quality-control not turned on. Please specify --perform_long_qc" -if (params.shortread_complexityfilter_tool == 'fastp' && ( params.perform_shortread_clipmerge == false || params.shortread_clipmerge_tool != 'fastp' )) exit 1, "ERROR: [nf-core/taxprofiler] cannot use fastp complexity filtering if preprocessing not turned on and/or tool is not fastp. Please specify --perform_shortread_clipmerge and/or --shortread_clipmerge_tool 'fastp'" +if (params.shortread_complexityfilter_tool == 'fastp' && ( params.perform_shortread_qc == false || params.shortread_qc_tool != 'fastp' )) exit 1, "ERROR: [nf-core/taxprofiler] cannot use fastp complexity filtering if preprocessing not turned on and/or tool is not fastp. Please specify --perform_shortread_qc and/or --shortread_qc_tool 'fastp'" if (params.perform_shortread_hostremoval && !params.hostremoval_reference) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval requested but no --hostremoval_reference FASTA supplied. Check input." } if (!params.hostremoval_reference && params.hostremoval_reference_index) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval_index provided but no --hostremoval_reference FASTA supplied. Check input." } From fa70a413cb85cc10c63b47195fbdb5b3c4e49192 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 31 May 2022 21:12:09 +0200 Subject: [PATCH 4/5] Use finally working filtlong module --- modules.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules.json b/modules.json index b27939b..fe25bed 100644 --- a/modules.json +++ b/modules.json @@ -37,7 +37,7 @@ "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "filtlong": { - "git_sha": "723852bf3d3b12059b2f53da8bc055206f3019d7" + "git_sha": "089f761f0bf79c4a486f1df9b6205f650196a2c1" }, "kaiju/kaiju": { "git_sha": "8856f127c58f6af479128be8b8df4d42e442ddbe" From d3d28da1b18d60fa406e9c175f25d49396245084 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 2 Jun 2022 11:53:53 +0200 Subject: [PATCH 5/5] Update after review --- conf/modules.config | 4 ++-- conf/test.config | 4 ++-- nextflow.config | 4 ++-- nextflow_schema.json | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 164fe91..09b2ed9 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -137,8 +137,8 @@ process { withName: FILTLONG { ext.args = [ "--min_length ${params.longread_qc_minlength}", - "--keep_percent ${params.longread_qc_keepbppercent}", - "--target_bases ${params.longread_qc_targetnbases}" + "--keep_percent ${params.longread_qc_keep_percent}", + "--target_bases ${params.longread_qc_target_bases}" ] .join(' ').trim() ext.prefix = { "${meta.id}_${meta.run_accession}_filtered" } diff --git a/conf/test.config b/conf/test.config index 6af771c..d6fc7fe 100644 --- a/conf/test.config +++ b/conf/test.config @@ -24,8 +24,8 @@ params { // TODO nf-core: Give any required params for the test so that command line flags are not needed input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv' - perform_shortread_qc = true - perform_longread_qc = false + perform_shortread_qc = true + perform_longread_qc = true perform_shortread_complexityfilter = true perform_shortread_hostremoval = true perform_longread_hostremoval = true diff --git a/nextflow.config b/nextflow.config index 5e9b385..d8f80ab 100644 --- a/nextflow.config +++ b/nextflow.config @@ -68,8 +68,8 @@ params { longread_qc_run_clip = false longread_qc_run_filter = false longread_qc_minlength = 1000 - longread_qc_keepbppercent = 90 - longread_qc_targetnbases = 500000000 + longread_qc_keep_percent = 90 + longread_qc_target_bases = 500000000 save_preprocessed_reads = false diff --git a/nextflow_schema.json b/nextflow_schema.json index c38e5d1..e3cc558 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -420,11 +420,11 @@ "type": "integer", "default": 1000 }, - "longread_qc_keepbppercent": { + "longread_qc_keep_percent": { "type": "integer", "default": 90 }, - "longread_qc_targetnbases": { + "longread_qc_target_bases": { "type": "integer", "default": 500000000 }