1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-22 15:29:56 +00:00

Merge pull request #74 from nf-core/add-filtlong

Add initial longread QC filtering - requires filtlong module update
This commit is contained in:
James A. Fellows Yates 2022-06-02 20:10:46 +02:00 committed by GitHub
commit 43ea8d299c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 237 additions and 79 deletions

View file

@ -29,18 +29,18 @@ jobs:
- NXF_VER: "" - NXF_VER: ""
NXF_EDGE: "1" NXF_EDGE: "1"
parameters: parameters:
- "--perform_longread_clip false" - "--perform_longread_qc false"
- "--perform_shortread_clipmerge false" - "--perform_shortread_qc false"
- "--shortread_clipmerge_tool fastp" - "--shortread_qc_tool fastp"
- "--shortread_clipmerge_tool fastp --shortread_clipmerge_mergepairs --shortread_clipmerge_excludeunmerged" - "--shortread_qc_tool fastp --shortread_qc_mergepairs --shortread_qc_excludeunmerged"
- "--shortread_clipmerge_tool fastp --shortread_clipmerge_mergepairs" - "--shortread_qc_tool fastp --shortread_qc_mergepairs"
- "--shortread_clipmerge_tool adapterremoval" - "--shortread_qc_tool adapterremoval"
- "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs --shortread_clipmerge_excludeunmerged" - "--shortread_qc_tool adapterremoval --shortread_qc_mergepairs --shortread_qc_excludeunmerged"
- "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs" - "--shortread_qc_tool adapterremoval --shortread_qc_mergepairs"
- "--shortread_complexityfilter_tool bbduk" - "--shortread_complexityfilter_tool bbduk"
- "--shortread_complexityfilter_tool prinseqplusplus" - "--shortread_complexityfilter_tool prinseqplusplus"
- "--perform_runmerging" - "--perform_runmerging"
- "--perform_runmerging --shortread_clipmerge_mergepairs" - "--perform_runmerging --shortread_qc_mergepairs"
- "--shortread_complexityfilter false --perform_shortread_hostremoval" - "--shortread_complexityfilter false --perform_shortread_hostremoval"
steps: steps:

View file

@ -56,6 +56,8 @@
> Buchfink, Benjamin, Chao Xie, and Daniel H. Huson. 2015. “Fast and Sensitive Protein Alignment Using DIAMOND.” Nature Methods 12 (1): 59-60. doi: 10.1038/nmeth.3176. > Buchfink, Benjamin, Chao Xie, and Daniel H. Huson. 2015. “Fast and Sensitive Protein Alignment Using DIAMOND.” Nature Methods 12 (1): 59-60. doi: 10.1038/nmeth.3176.
- [FILTLONG](https://github.com/rrwick/Filtlong)
## Software packaging/containerisation tools ## Software packaging/containerisation tools
- [Anaconda](https://anaconda.com) - [Anaconda](https://anaconda.com)

View file

@ -51,10 +51,10 @@ process {
withName: FASTP_SINGLE { withName: FASTP_SINGLE {
ext.args = [ ext.args = [
// trimming options // trimming options
params.shortread_clipmerge_skipadaptertrim ? "--disable_adapter_trimming" : "", params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "",
params.shortread_clipmerge_adapter1 ? "--adapter_sequence ${params.shortread_clipmerge_adapter1}" : "", params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
// filtering options // filtering options
"--length_required ${params.shortread_clipmerge_minlength}", "--length_required ${params.shortread_qc_minlength}",
(params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp') ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : '' (params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp') ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : ''
].join(' ').trim() ].join(' ').trim()
ext.prefix = { "${meta.id}_${meta.run_accession}" } ext.prefix = { "${meta.id}_${meta.run_accession}" }
@ -69,13 +69,13 @@ process {
withName: FASTP_PAIRED { withName: FASTP_PAIRED {
ext.args = [ ext.args = [
// collapsing options - option to retain singletons // collapsing options - option to retain singletons
params.shortread_clipmerge_excludeunmerged ? '' : "--include_unmerged", params.shortread_qc_excludeunmerged ? '' : "--include_unmerged",
// trimming options // trimming options
params.shortread_clipmerge_skipadaptertrim ? "--disable_adapter_trimming" : "", params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "",
params.shortread_clipmerge_adapter1 ? "--adapter_sequence ${params.shortread_clipmerge_adapter1}" : "", params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
params.shortread_clipmerge_adapter2 ? "--adapter_sequence_r2 ${params.shortread_clipmerge_adapter2}" : "--detect_adapter_for_pe", params.shortread_qc_adapter2 ? "--adapter_sequence_r2 ${params.shortread_qc_adapter2}" : "--detect_adapter_for_pe",
// filtering options // filtering options
"--length_required ${params.shortread_clipmerge_minlength}", "--length_required ${params.shortread_qc_minlength}",
params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp' ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : '' params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp' ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : ''
].join(' ').trim() ].join(' ').trim()
ext.prefix = { "${meta.id}_${meta.run_accession}" } ext.prefix = { "${meta.id}_${meta.run_accession}" }
@ -90,10 +90,10 @@ process {
withName: ADAPTERREMOVAL_SINGLE { withName: ADAPTERREMOVAL_SINGLE {
ext.args = [ ext.args = [
// trimming options // trimming options
params.shortread_clipmerge_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "", params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
params.shortread_clipmerge_adapter1 ? "--adapter1 ${params.shortread_clipmerge_adapter1}" : "", params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "",
// filtering options // filtering options
"--minlength ${params.shortread_clipmerge_minlength}" "--minlength ${params.shortread_qc_minlength}"
].join(' ').trim() ].join(' ').trim()
ext.prefix = { "${meta.id}_${meta.run_accession}" } ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [ publishDir = [
@ -107,13 +107,13 @@ process {
withName: ADAPTERREMOVAL_PAIRED { withName: ADAPTERREMOVAL_PAIRED {
ext.args = [ ext.args = [
// collapsing options // collapsing options
params.shortread_clipmerge_mergepairs ? "--collapse" : "", params.shortread_qc_mergepairs ? "--collapse" : "",
// trimming options // trimming options
params.shortread_clipmerge_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "", params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
params.shortread_clipmerge_adapter1 ? "--adapter1 ${params.shortread_clipmerge_adapter1}" : "", params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "",
params.shortread_clipmerge_adapter2 ? "--adapter2 ${params.shortread_clipmerge_adapter2}" : "", params.shortread_qc_adapter2 ? "--adapter2 ${params.shortread_qc_adapter2}" : "",
// filtering options // filtering options
"--minlength ${params.shortread_clipmerge_minlength}" "--minlength ${params.shortread_qc_minlength}"
].join(' ').trim() ].join(' ').trim()
ext.prefix = { "${meta.id}_${meta.run_accession}" } ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [ publishDir = [
@ -134,6 +134,22 @@ process {
] ]
} }
withName: FILTLONG {
ext.args = [
"--min_length ${params.longread_qc_minlength}",
"--keep_percent ${params.longread_qc_keep_percent}",
"--target_bases ${params.longread_qc_target_bases}"
]
.join(' ').trim()
ext.prefix = { "${meta.id}_${meta.run_accession}_filtered" }
publishDir = [
path: { "${params.outdir}/filtlong" },
mode: params.publish_dir_mode,
pattern: '*.fastq.gz',
enabled: params.save_preprocessed_reads
]
}
withName: BOWTIE2_BUILD { withName: BOWTIE2_BUILD {
publishDir = [ publishDir = [
path: { "${params.outdir}/bowtie2/build" }, path: { "${params.outdir}/bowtie2/build" },

View file

@ -24,8 +24,8 @@ params {
// TODO nf-core: Give any required params for the test so that command line flags are not needed // TODO nf-core: Give any required params for the test so that command line flags are not needed
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv' databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
perform_shortread_clipmerge = true perform_shortread_qc = true
perform_longread_clip = false perform_longread_qc = true
perform_shortread_complexityfilter = true perform_shortread_complexityfilter = true
perform_shortread_hostremoval = true perform_shortread_hostremoval = true
perform_longread_hostremoval = true perform_longread_hostremoval = true

View file

@ -24,8 +24,8 @@ params {
// TODO nf-core: Give any required params for the test so that command line flags are not needed // TODO nf-core: Give any required params for the test so that command line flags are not needed
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv' databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
perform_shortread_clipmerge = false perform_shortread_qc = false
perform_longread_clip = false perform_longread_qc = false
perform_shortread_complexityfilter = false perform_shortread_complexityfilter = false
perform_shortread_hostremoval = false perform_shortread_hostremoval = false
perform_longread_hostremoval = false perform_longread_hostremoval = false

View file

@ -24,8 +24,8 @@ params {
// TODO nf-core: Give any required params for the test so that command line flags are not needed // TODO nf-core: Give any required params for the test so that command line flags are not needed
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv' databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
perform_shortread_clipmerge = true perform_shortread_qc = true
perform_longread_clip = true perform_longread_qc = true
perform_shortread_complexityfilter = true perform_shortread_complexityfilter = true
perform_shortread_hostremoval = true perform_shortread_hostremoval = true
perform_longread_hostremoval = true perform_longread_hostremoval = true

View file

@ -162,16 +162,16 @@ nf-core/taxprofiler offers four main preprocessing steps
#### Read Processing #### Read Processing
Raw sequencing read processing in the form of adapter clipping and paired-end read merging can be activated via the `--perform_shortread_clipmerge` or `--perform_longread_clip` flags. Raw sequencing read processing in the form of adapter clipping and paired-end read merging can be activated via the `--perform_shortread_qc` or `--perform_longread_qc` flags.
It is highly recommended to run this on raw reads to remove artefacts from sequencing that can cause false positive identification of taxa (e.g. contaminated reference genomes) and/or skews in taxonomic abundance profiles. It is highly recommended to run this on raw reads to remove artefacts from sequencing that can cause false positive identification of taxa (e.g. contaminated reference genomes) and/or skews in taxonomic abundance profiles.
There are currently two options for short-read preprocessing: `fastp` or `adapterremoval`. There are currently two options for short-read preprocessing: `fastp` or `adapterremoval`.
For adapter clipping, you can either rely on tool default adapter sequences, or supply your own adapters (`--shortread_clipmerge_adapter1` and `--shortread_clipmerge_adapter2`) For adapter clipping, you can either rely on tool default adapter sequences, or supply your own adapters (`--shortread_qc_adapter1` and `--shortread_qc_adapter2`)
By default, paired-end merging is not activated and paired-end profiling is performed where supported otherwise pairs will be independently profiled. If paired-end merging is activated you can also specify whether to exclude unmerged reads in the reads sent for profiling (`--shortread_clipmerge_mergepairs` and `--shortread_clipmerge_excludeunmerged`). By default, paired-end merging is not activated and paired-end profiling is performed where supported otherwise pairs will be independently profiled. If paired-end merging is activated you can also specify whether to exclude unmerged reads in the reads sent for profiling (`--shortread_qc_mergepairs` and `--shortread_qc_excludeunmerged`).
You can also turn off clipping and only perform paired-end merging, if requested. This can be useful when processing data downloaded from the ENA, SRA, or DDBJ (`--shortread_clipmerge_skipadaptertrim`). You can also turn off clipping and only perform paired-end merging, if requested. This can be useful when processing data downloaded from the ENA, SRA, or DDBJ (`--shortread_qc_skipadaptertrim`).
Both tools support length filtering of reads and can be tuned with `--shortread_clipmerge_minlength`. Performing length filtering can be useful to remove short (often low sequencing complexity) sequences that result in unspecific classification and therefore slow down runtime during profiling, with minimal gain. Both tools support length filtering of reads and can be tuned with `--shortread_qc_minlength`. Performing length filtering can be useful to remove short (often low sequencing complexity) sequences that result in unspecific classification and therefore slow down runtime during profiling, with minimal gain.
There is currently one option for long-read Oxford Nanopore processing: `porechop`. There is currently one option for long-read Oxford Nanopore processing: `porechop`.

View file

@ -36,6 +36,9 @@
"fastqc": { "fastqc": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
}, },
"filtlong": {
"git_sha": "089f761f0bf79c4a486f1df9b6205f650196a2c1"
},
"kaiju/kaiju": { "kaiju/kaiju": {
"git_sha": "8856f127c58f6af479128be8b8df4d42e442ddbe" "git_sha": "8856f127c58f6af479128be8b8df4d42e442ddbe"
}, },

37
modules/nf-core/modules/filtlong/main.nf generated Normal file
View file

@ -0,0 +1,37 @@
process FILTLONG {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::filtlong=0.2.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/filtlong:0.2.1--h9a82719_0' :
'quay.io/biocontainers/filtlong:0.2.1--h9a82719_0' }"
input:
tuple val(meta), path(shortreads), path(longreads)
output:
tuple val(meta), path("*.fastq.gz"), emit: reads
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def short_reads = !shortreads ? "" : meta.single_end ? "-1 $shortreads" : "-1 ${shortreads[0]} -2 ${shortreads[1]}"
if ("$longreads" == "${prefix}.fastq.gz") error "Longread FASTQ input and output names are the same, set prefix in module configuration to disambiguate!"
"""
filtlong \\
$short_reads \\
$args \\
$longreads \\
| gzip -n > ${prefix}.fastq.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
filtlong: \$( filtlong --version | sed -e "s/Filtlong v//g" )
END_VERSIONS
"""
}

View file

@ -0,0 +1,50 @@
name: filtlong
description: Filtlong filters long reads based on quality measures or short read data.
keywords:
- nanopore
- quality control
- QC
- filtering
- long reads
- short reads
tools:
- filtlong:
description: Filtlong is a tool for filtering long reads. It can take a set of long reads and produce a smaller, better subset. It uses both read length (longer is better) and read identity (higher is better) when choosing which reads pass the filter.
homepage: https://anaconda.org/bioconda/filtlong
documentation: None
tool_dev_url: https://github.com/rrwick/Filtlong
doi: ""
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- shortreads:
type: file
description: fastq file
pattern: "*.{fq,fastq,fq.gz,fastq.gz}"
- longreads:
type: file
description: fastq file
pattern: "*.{fq,fastq,fq.gz,fastq.gz}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- reads:
type: file
description: Filtered (compressed) fastq file
pattern: "*.fastq.gz"
authors:
- "@d4straub"

View file

@ -55,15 +55,22 @@ params {
databases = null databases = null
// FASTQ preprocessing // FASTQ preprocessing
perform_shortread_clipmerge = false perform_shortread_qc = false
shortread_clipmerge_tool = 'fastp' shortread_qc_tool = 'fastp'
shortread_clipmerge_skipadaptertrim = false shortread_qc_skipadaptertrim = false
shortread_clipmerge_mergepairs = false shortread_qc_mergepairs = false
shortread_clipmerge_excludeunmerged = false shortread_qc_excludeunmerged = false
shortread_clipmerge_adapter1 = null shortread_qc_adapter1 = null
shortread_clipmerge_adapter2 = null shortread_qc_adapter2 = null
shortread_clipmerge_minlength = 15 shortread_qc_minlength = 15
perform_longread_clip = false
perform_longread_qc = false
longread_qc_run_clip = false
longread_qc_run_filter = false
longread_qc_minlength = 1000
longread_qc_keep_percent = 90
longread_qc_target_bases = 500000000
save_preprocessed_reads = false save_preprocessed_reads = false
// Complexity filtering // Complexity filtering
@ -191,6 +198,7 @@ profiles {
} }
// Load igenomes.config if required // Load igenomes.config if required
if (!params.igenomes_ignore) { if (!params.igenomes_ignore) {
includeConfig 'conf/igenomes.config' includeConfig 'conf/igenomes.config'
} else { } else {

View file

@ -262,7 +262,7 @@
"type": "string", "type": "string",
"default": "None" "default": "None"
}, },
"shortread_clipmerge_excludeunmerged": { "shortread_qc_excludeunmerged": {
"type": "boolean" "type": "boolean"
}, },
"run_malt": { "run_malt": {
@ -291,26 +291,26 @@
"type": "boolean", "type": "boolean",
"description": "Enable MetaPhlAn for taxonomic profiling" "description": "Enable MetaPhlAn for taxonomic profiling"
}, },
"shortread_clipmerge_tool": { "shortread_qc_tool": {
"type": "string", "type": "string",
"default": "fastp", "default": "fastp",
"enum": ["fastp", "adapterremoval"] "enum": ["fastp", "adapterremoval"]
}, },
"shortread_clipmerge_skipadaptertrim": { "shortread_qc_skipadaptertrim": {
"type": "boolean" "type": "boolean"
}, },
"shortread_clipmerge_mergepairs": { "shortread_qc_mergepairs": {
"type": "boolean" "type": "boolean"
}, },
"shortread_clipmerge_adapter1": { "shortread_qc_adapter1": {
"type": "string", "type": "string",
"default": "None" "default": "None"
}, },
"shortread_clipmerge_adapter2": { "shortread_qc_adapter2": {
"type": "string", "type": "string",
"default": "None" "default": "None"
}, },
"shortread_clipmerge_minlength": { "shortread_qc_minlength": {
"type": "integer", "type": "integer",
"default": 15 "default": 15
}, },
@ -348,10 +348,10 @@
"save_runmerged_reads": { "save_runmerged_reads": {
"type": "boolean" "type": "boolean"
}, },
"perform_shortread_clipmerge": { "perform_shortread_qc": {
"type": "boolean" "type": "boolean"
}, },
"perform_longread_clip": { "perform_longread_qc": {
"type": "boolean" "type": "boolean"
}, },
"perform_shortread_complexityfilter": { "perform_shortread_complexityfilter": {
@ -409,6 +409,24 @@
"shortread_complexityfilter_fastp_threshold": { "shortread_complexityfilter_fastp_threshold": {
"type": "integer", "type": "integer",
"default": 30 "default": 30
},
"longread_qc_run_clip": {
"type": "boolean"
},
"longread_qc_run_filter": {
"type": "boolean"
},
"longread_qc_minlength": {
"type": "integer",
"default": 1000
},
"longread_qc_keep_percent": {
"type": "integer",
"default": 90
},
"longread_qc_target_bases": {
"type": "integer",
"default": 500000000
} }
} }
} }

View file

@ -4,6 +4,7 @@
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main' include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main'
include { PORECHOP } from '../../modules/nf-core/modules/porechop/main' include { PORECHOP } from '../../modules/nf-core/modules/porechop/main'
include { FILTLONG } from '../../modules/nf-core/modules/filtlong/main'
workflow LONGREAD_PREPROCESSING { workflow LONGREAD_PREPROCESSING {
take: take:
@ -13,6 +14,7 @@ workflow LONGREAD_PREPROCESSING {
ch_versions = Channel.empty() ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty() ch_multiqc_files = Channel.empty()
if ( params.longread_qc_run_clip && !params.longread_qc_run_filter ) {
PORECHOP ( reads ) PORECHOP ( reads )
ch_processed_reads = PORECHOP.out.reads ch_processed_reads = PORECHOP.out.reads
@ -21,12 +23,33 @@ workflow LONGREAD_PREPROCESSING {
def meta_new = meta.clone() def meta_new = meta.clone()
meta_new['single_end'] = 1 meta_new['single_end'] = 1
[ meta_new, reads ] [ meta_new, reads ]
ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
}
} else if ( !params.longread_qc_run_clip && params.longread_qc_run_filter ) {
ch_processed_reads = FILTLONG ( reads.map{ meta, reads -> [meta, [], reads ]} )
ch_versions = ch_versions.mix(FILTLONG.out.versions.first())
} else {
PORECHOP ( reads )
ch_clipped_reads = PORECHOP.out.reads
.map {
meta, reads ->
def meta_new = meta.clone()
meta_new['single_end'] = 1
[ meta_new, reads ]
} }
FASTQC_PROCESSED ( PORECHOP.out.reads ) ch_processed_reads = FILTLONG ( ch_clipped_reads.map{ meta, reads -> [meta, [], reads ]} ).reads
ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
ch_versions = ch_versions.mix(FILTLONG.out.versions.first())
}
FASTQC_PROCESSED ( ch_processed_reads.dump(tag: "filtlong") )
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
emit: emit:
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ] reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]

View file

@ -29,7 +29,7 @@ workflow SHORTREAD_ADAPTERREMOVAL {
* has to be exported in a separate channel and we must manually recombine when necessary. * has to be exported in a separate channel and we must manually recombine when necessary.
*/ */
if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) { if ( params.shortread_qc_mergepairs && !params.shortread_qc_excludeunmerged ) {
ch_concat_fastq = Channel.empty() ch_concat_fastq = Channel.empty()
.mix( .mix(
@ -54,7 +54,7 @@ workflow SHORTREAD_ADAPTERREMOVAL {
ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
.mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated) .mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
} else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) { } else if ( params.shortread_qc_mergepairs && params.shortread_qc_excludeunmerged ) {
ch_concat_fastq = Channel.empty() ch_concat_fastq = Channel.empty()
.mix( .mix(

View file

@ -21,9 +21,9 @@ workflow SHORTREAD_FASTP {
FASTP_SINGLE ( ch_input_for_fastp.single, false, false ) FASTP_SINGLE ( ch_input_for_fastp.single, false, false )
// Last parameter here turns on merging of PE data // Last parameter here turns on merging of PE data
FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_clipmerge_mergepairs ) FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_qc_mergepairs )
if ( params.shortread_clipmerge_mergepairs ) { if ( params.shortread_qc_mergepairs ) {
ch_fastp_reads_prepped_pe = FASTP_PAIRED.out.reads_merged ch_fastp_reads_prepped_pe = FASTP_PAIRED.out.reads_merged
.map { .map {
meta, reads -> meta, reads ->

View file

@ -15,11 +15,11 @@ workflow SHORTREAD_PREPROCESSING {
ch_versions = Channel.empty() ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty() ch_multiqc_files = Channel.empty()
if ( params.shortread_clipmerge_tool == "fastp" ) { if ( params.shortread_qc_tool == "fastp" ) {
ch_processed_reads = SHORTREAD_FASTP ( reads ).reads ch_processed_reads = SHORTREAD_FASTP ( reads ).reads
ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions ) ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc ) ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc )
} else if ( params.shortread_clipmerge_tool == "adapterremoval" ) { } else if ( params.shortread_qc_tool == "adapterremoval" ) {
ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads
ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions ) ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc ) ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc )

View file

@ -20,10 +20,11 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true
if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' } if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files." if (params.shortread_qc_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files."
if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "ERROR: [nf-core/taxprofiler] cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs" if (params.shortread_qc_excludeunmerged && !params.shortread_qc_mergepairs) exit 1, "ERROR: [nf-core/taxprofiler] cannot include unmerged reads when merging not turned on. Please specify --shortread_qc_mergepairs"
if ( (params.longread_qc_run_clip || params.longread_qc_run_filter) & !params.perform_longread_qc ) exit 1, "ERROR: [nf-core/taxprofiler] --longread_qc_run_clip or --longread_qc_run_filter requested but quality-control not turned on. Please specify --perform_long_qc"
if (params.shortread_complexityfilter_tool == 'fastp' && ( params.perform_shortread_clipmerge == false || params.shortread_clipmerge_tool != 'fastp' )) exit 1, "ERROR: [nf-core/taxprofiler] cannot use fastp complexity filtering if preprocessing not turned on and/or tool is not fastp. Please specify --perform_shortread_clipmerge and/or --shortread_clipmerge_tool 'fastp'" if (params.shortread_complexityfilter_tool == 'fastp' && ( params.perform_shortread_qc == false || params.shortread_qc_tool != 'fastp' )) exit 1, "ERROR: [nf-core/taxprofiler] cannot use fastp complexity filtering if preprocessing not turned on and/or tool is not fastp. Please specify --perform_shortread_qc and/or --shortread_qc_tool 'fastp'"
if (params.perform_shortread_hostremoval && !params.hostremoval_reference) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval requested but no --hostremoval_reference FASTA supplied. Check input." } if (params.perform_shortread_hostremoval && !params.hostremoval_reference) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval requested but no --hostremoval_reference FASTA supplied. Check input." }
if (!params.hostremoval_reference && params.hostremoval_reference_index) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval_index provided but no --hostremoval_reference FASTA supplied. Check input." } if (!params.hostremoval_reference && params.hostremoval_reference_index) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval_index provided but no --hostremoval_reference FASTA supplied. Check input." }
@ -116,14 +117,14 @@ workflow TAXPROFILER {
/* /*
SUBWORKFLOW: PERFORM PREPROCESSING SUBWORKFLOW: PERFORM PREPROCESSING
*/ */
if ( params.perform_shortread_clipmerge ) { if ( params.perform_shortread_qc ) {
ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq ).reads ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq ).reads
ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions ) ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions )
} else { } else {
ch_shortreads_preprocessed = INPUT_CHECK.out.fastq ch_shortreads_preprocessed = INPUT_CHECK.out.fastq
} }
if ( params.perform_longread_clip ) { if ( params.perform_longread_qc ) {
ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads
.map { it -> [ it[0], [it[1]] ] } .map { it -> [ it[0], [it[1]] ] }
ch_versions = ch_versions.mix( LONGREAD_PREPROCESSING.out.versions ) ch_versions = ch_versions.mix( LONGREAD_PREPROCESSING.out.versions )
@ -227,11 +228,11 @@ workflow TAXPROFILER {
ch_multiqc_files = ch_multiqc_files.mix(ch_taxprofiler_logo.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(ch_taxprofiler_logo.ifEmpty([]))
if (params.perform_shortread_clipmerge) { if (params.perform_shortread_qc) {
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) ) ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
} }
if (params.perform_longread_clip) { if (params.perform_longread_qc) {
ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) ) ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
} }