mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-22 06:29:55 +00:00
Add initial longread QC filtering - requires filtlong module update
This commit is contained in:
parent
2bae9d58ee
commit
031cb45934
15 changed files with 233 additions and 74 deletions
18
.github/workflows/ci.yml
vendored
18
.github/workflows/ci.yml
vendored
|
@ -29,18 +29,18 @@ jobs:
|
||||||
- NXF_VER: ""
|
- NXF_VER: ""
|
||||||
NXF_EDGE: "1"
|
NXF_EDGE: "1"
|
||||||
parameters:
|
parameters:
|
||||||
- "--perform_longread_clip false"
|
- "--perform_longread_qc false"
|
||||||
- "--perform_shortread_clipmerge false"
|
- "--perform_shortread_qc false"
|
||||||
- "--shortread_clipmerge_tool fastp"
|
- "--shortread_qc_tool fastp"
|
||||||
- "--shortread_clipmerge_tool fastp --shortread_clipmerge_mergepairs --shortread_clipmerge_excludeunmerged"
|
- "--shortread_qc_tool fastp --shortread_qc_mergepairs --shortread_qc_excludeunmerged"
|
||||||
- "--shortread_clipmerge_tool fastp --shortread_clipmerge_mergepairs"
|
- "--shortread_qc_tool fastp --shortread_qc_mergepairs"
|
||||||
- "--shortread_clipmerge_tool adapterremoval"
|
- "--shortread_qc_tool adapterremoval"
|
||||||
- "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs --shortread_clipmerge_excludeunmerged"
|
- "--shortread_qc_tool adapterremoval --shortread_qc_mergepairs --shortread_qc_excludeunmerged"
|
||||||
- "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs"
|
- "--shortread_qc_tool adapterremoval --shortread_qc_mergepairs"
|
||||||
- "--shortread_complexityfilter_tool bbduk"
|
- "--shortread_complexityfilter_tool bbduk"
|
||||||
- "--shortread_complexityfilter_tool prinseq"
|
- "--shortread_complexityfilter_tool prinseq"
|
||||||
- "--perform_runmerging"
|
- "--perform_runmerging"
|
||||||
- "--perform_runmerging --shortread_clipmerge_mergepairs"
|
- "--perform_runmerging --shortread_qc_mergepairs"
|
||||||
- "--shortread_complexityfilter false --perform_shortread_hostremoval"
|
- "--shortread_complexityfilter false --perform_shortread_hostremoval"
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
|
|
|
@ -56,6 +56,8 @@
|
||||||
|
|
||||||
> Buchfink, Benjamin, Chao Xie, and Daniel H. Huson. 2015. “Fast and Sensitive Protein Alignment Using DIAMOND.” Nature Methods 12 (1): 59-60. doi: 10.1038/nmeth.3176.
|
> Buchfink, Benjamin, Chao Xie, and Daniel H. Huson. 2015. “Fast and Sensitive Protein Alignment Using DIAMOND.” Nature Methods 12 (1): 59-60. doi: 10.1038/nmeth.3176.
|
||||||
|
|
||||||
|
- [FILTLONG](https://github.com/rrwick/Filtlong)
|
||||||
|
|
||||||
## Software packaging/containerisation tools
|
## Software packaging/containerisation tools
|
||||||
|
|
||||||
- [Anaconda](https://anaconda.com)
|
- [Anaconda](https://anaconda.com)
|
||||||
|
|
|
@ -51,10 +51,10 @@ process {
|
||||||
withName: FASTP_SINGLE {
|
withName: FASTP_SINGLE {
|
||||||
ext.args = [
|
ext.args = [
|
||||||
// trimming options
|
// trimming options
|
||||||
params.shortread_clipmerge_skipadaptertrim ? "--disable_adapter_trimming" : "",
|
params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "",
|
||||||
params.shortread_clipmerge_adapter1 ? "--adapter_sequence ${params.shortread_clipmerge_adapter1}" : "",
|
params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
|
||||||
// filtering options
|
// filtering options
|
||||||
"--length_required ${params.shortread_clipmerge_minlength}"
|
"--length_required ${params.shortread_qc_minlength}"
|
||||||
].join(' ').trim()
|
].join(' ').trim()
|
||||||
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||||
publishDir = [
|
publishDir = [
|
||||||
|
@ -68,13 +68,13 @@ process {
|
||||||
withName: FASTP_PAIRED {
|
withName: FASTP_PAIRED {
|
||||||
ext.args = [
|
ext.args = [
|
||||||
// collapsing options - option to retain singletons
|
// collapsing options - option to retain singletons
|
||||||
params.shortread_clipmerge_excludeunmerged ? '' : "--include_unmerged",
|
params.shortread_qc_excludeunmerged ? '' : "--include_unmerged",
|
||||||
// trimming options
|
// trimming options
|
||||||
params.shortread_clipmerge_skipadaptertrim ? "--disable_adapter_trimming" : "",
|
params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "",
|
||||||
params.shortread_clipmerge_adapter1 ? "--adapter_sequence ${params.shortread_clipmerge_adapter1}" : "",
|
params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
|
||||||
params.shortread_clipmerge_adapter2 ? "--adapter_sequence_r2 ${params.shortread_clipmerge_adapter2}" : "--detect_adapter_for_pe",
|
params.shortread_qc_adapter2 ? "--adapter_sequence_r2 ${params.shortread_qc_adapter2}" : "--detect_adapter_for_pe",
|
||||||
// filtering options
|
// filtering options
|
||||||
"--length_required ${params.shortread_clipmerge_minlength}"
|
"--length_required ${params.shortread_qc_minlength}"
|
||||||
].join(' ').trim()
|
].join(' ').trim()
|
||||||
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||||
publishDir = [
|
publishDir = [
|
||||||
|
@ -88,10 +88,10 @@ process {
|
||||||
withName: ADAPTERREMOVAL_SINGLE {
|
withName: ADAPTERREMOVAL_SINGLE {
|
||||||
ext.args = [
|
ext.args = [
|
||||||
// trimming options
|
// trimming options
|
||||||
params.shortread_clipmerge_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
|
params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
|
||||||
params.shortread_clipmerge_adapter1 ? "--adapter1 ${params.shortread_clipmerge_adapter1}" : "",
|
params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "",
|
||||||
// filtering options
|
// filtering options
|
||||||
"--minlength ${params.shortread_clipmerge_minlength}"
|
"--minlength ${params.shortread_qc_minlength}"
|
||||||
].join(' ').trim()
|
].join(' ').trim()
|
||||||
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||||
publishDir = [
|
publishDir = [
|
||||||
|
@ -105,13 +105,13 @@ process {
|
||||||
withName: ADAPTERREMOVAL_PAIRED {
|
withName: ADAPTERREMOVAL_PAIRED {
|
||||||
ext.args = [
|
ext.args = [
|
||||||
// collapsing options
|
// collapsing options
|
||||||
params.shortread_clipmerge_mergepairs ? "--collapse" : "",
|
params.shortread_qc_mergepairs ? "--collapse" : "",
|
||||||
// trimming options
|
// trimming options
|
||||||
params.shortread_clipmerge_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
|
params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
|
||||||
params.shortread_clipmerge_adapter1 ? "--adapter1 ${params.shortread_clipmerge_adapter1}" : "",
|
params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "",
|
||||||
params.shortread_clipmerge_adapter2 ? "--adapter2 ${params.shortread_clipmerge_adapter2}" : "",
|
params.shortread_qc_adapter2 ? "--adapter2 ${params.shortread_qc_adapter2}" : "",
|
||||||
// filtering options
|
// filtering options
|
||||||
"--minlength ${params.shortread_clipmerge_minlength}"
|
"--minlength ${params.shortread_qc_minlength}"
|
||||||
].join(' ').trim()
|
].join(' ').trim()
|
||||||
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||||
publishDir = [
|
publishDir = [
|
||||||
|
@ -132,6 +132,22 @@ process {
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
withName: FILTLONG {
|
||||||
|
ext.args = [
|
||||||
|
"--min_length ${params.longread_qc_minlength}",
|
||||||
|
"--keep_percent ${params.longread_qc_keepbppercent}",
|
||||||
|
"--target_bases ${params.longread_qc_targetnbases}"
|
||||||
|
]
|
||||||
|
.join(' ').trim()
|
||||||
|
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||||
|
publishDir = [
|
||||||
|
path: { "${params.outdir}/porechop" },
|
||||||
|
mode: params.publish_dir_mode,
|
||||||
|
pattern: '*.fastq.gz',
|
||||||
|
enabled: params.save_preprocessed_reads
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
withName: BOWTIE2_BUILD {
|
withName: BOWTIE2_BUILD {
|
||||||
publishDir = [
|
publishDir = [
|
||||||
path: { "${params.outdir}/bowtie2/build" },
|
path: { "${params.outdir}/bowtie2/build" },
|
||||||
|
|
|
@ -24,8 +24,8 @@ params {
|
||||||
// TODO nf-core: Give any required params for the test so that command line flags are not needed
|
// TODO nf-core: Give any required params for the test so that command line flags are not needed
|
||||||
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
|
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
|
||||||
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
|
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
|
||||||
perform_shortread_clipmerge = true
|
perform_shortread_qc = true
|
||||||
perform_longread_clip = false
|
perform_longread_qc = false
|
||||||
perform_shortread_complexityfilter = true
|
perform_shortread_complexityfilter = true
|
||||||
perform_shortread_hostremoval = true
|
perform_shortread_hostremoval = true
|
||||||
shortread_hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
|
shortread_hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
|
||||||
|
|
|
@ -162,16 +162,16 @@ nf-core/taxprofiler offers four main preprocessing steps
|
||||||
|
|
||||||
#### Read Processing
|
#### Read Processing
|
||||||
|
|
||||||
Raw sequencing read processing in the form of adapter clipping and paired-end read merging can be activated via the `--perform_shortread_clipmerge` or `--perform_longread_clip` flags.
|
Raw sequencing read processing in the form of adapter clipping and paired-end read merging can be activated via the `--perform_shortread_qc` or `--perform_longread_qc` flags.
|
||||||
|
|
||||||
It is highly recommended to run this on raw reads to remove artefacts from sequencing that can cause false positive identification of taxa (e.g. contaminated reference genomes) and/or skews in taxonomic abundance profiles.
|
It is highly recommended to run this on raw reads to remove artefacts from sequencing that can cause false positive identification of taxa (e.g. contaminated reference genomes) and/or skews in taxonomic abundance profiles.
|
||||||
|
|
||||||
There are currently two options for short-read preprocessing: `fastp` or `adapterremoval`.
|
There are currently two options for short-read preprocessing: `fastp` or `adapterremoval`.
|
||||||
|
|
||||||
For adapter clipping, you can either rely on tool default adapter sequences, or supply your own adapters (`--shortread_clipmerge_adapter1` and `--shortread_clipmerge_adapter2`)
|
For adapter clipping, you can either rely on tool default adapter sequences, or supply your own adapters (`--shortread_qc_adapter1` and `--shortread_qc_adapter2`)
|
||||||
By default, paired-end merging is not activated and paired-end profiling is performed where supported otherwise pairs will be independently profiled. If paired-end merging is activated you can also specify whether to exclude unmerged reads in the reads sent for profiling (`--shortread_clipmerge_mergepairs` and `--shortread_clipmerge_excludeunmerged`).
|
By default, paired-end merging is not activated and paired-end profiling is performed where supported otherwise pairs will be independently profiled. If paired-end merging is activated you can also specify whether to exclude unmerged reads in the reads sent for profiling (`--shortread_qc_mergepairs` and `--shortread_qc_excludeunmerged`).
|
||||||
You can also turn off clipping and only perform paired-end merging, if requested. This can be useful when processing data downloaded from the ENA, SRA, or DDBJ (`--shortread_clipmerge_skipadaptertrim`).
|
You can also turn off clipping and only perform paired-end merging, if requested. This can be useful when processing data downloaded from the ENA, SRA, or DDBJ (`--shortread_qc_skipadaptertrim`).
|
||||||
Both tools support length filtering of reads and can be tuned with `--shortread_clipmerge_minlength`. Performing length filtering can be useful to remove short (often low sequencing complexity) sequences that result in unspecific classification and therefore slow down runtime during profiling, with minimal gain.
|
Both tools support length filtering of reads and can be tuned with `--shortread_qc_minlength`. Performing length filtering can be useful to remove short (often low sequencing complexity) sequences that result in unspecific classification and therefore slow down runtime during profiling, with minimal gain.
|
||||||
|
|
||||||
There is currently one option for long-read Oxford Nanopore processing: `porechop`.
|
There is currently one option for long-read Oxford Nanopore processing: `porechop`.
|
||||||
|
|
||||||
|
|
|
@ -36,6 +36,9 @@
|
||||||
"fastqc": {
|
"fastqc": {
|
||||||
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
||||||
},
|
},
|
||||||
|
"filtlong": {
|
||||||
|
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
||||||
|
},
|
||||||
"kaiju/kaiju": {
|
"kaiju/kaiju": {
|
||||||
"git_sha": "8856f127c58f6af479128be8b8df4d42e442ddbe"
|
"git_sha": "8856f127c58f6af479128be8b8df4d42e442ddbe"
|
||||||
},
|
},
|
||||||
|
|
36
modules/nf-core/modules/filtlong/main.nf
generated
Normal file
36
modules/nf-core/modules/filtlong/main.nf
generated
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
process FILTLONG {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_low'
|
||||||
|
|
||||||
|
conda (params.enable_conda ? "bioconda::filtlong=0.2.1" : null)
|
||||||
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
|
'https://depot.galaxyproject.org/singularity/filtlong:0.2.1--h9a82719_0' :
|
||||||
|
'quay.io/biocontainers/filtlong:0.2.1--h9a82719_0' }"
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(shortreads), path(longreads)
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path("${meta.id}_lr_filtlong.fastq.gz"), emit: reads
|
||||||
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
|
when:
|
||||||
|
task.ext.when == null || task.ext.when
|
||||||
|
|
||||||
|
script:
|
||||||
|
def args = task.ext.args ?: ''
|
||||||
|
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||||
|
def short_reads = meta.single_end ? "-1 $shortreads" : "-1 ${shortreads[0]} -2 ${shortreads[1]}"
|
||||||
|
"""
|
||||||
|
filtlong \\
|
||||||
|
$short_reads \\
|
||||||
|
$args \\
|
||||||
|
$longreads \\
|
||||||
|
| gzip -n > ${prefix}_lr_filtlong.fastq.gz
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
filtlong: \$( filtlong --version | sed -e "s/Filtlong v//g" )
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
50
modules/nf-core/modules/filtlong/meta.yml
generated
Normal file
50
modules/nf-core/modules/filtlong/meta.yml
generated
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
name: filtlong
|
||||||
|
description: Filtlong filters long reads based on quality measures or short read data.
|
||||||
|
keywords:
|
||||||
|
- nanopore
|
||||||
|
- quality control
|
||||||
|
- QC
|
||||||
|
- filtering
|
||||||
|
- long reads
|
||||||
|
- short reads
|
||||||
|
tools:
|
||||||
|
- filtlong:
|
||||||
|
description: Filtlong is a tool for filtering long reads. It can take a set of long reads and produce a smaller, better subset. It uses both read length (longer is better) and read identity (higher is better) when choosing which reads pass the filter.
|
||||||
|
homepage: https://anaconda.org/bioconda/filtlong
|
||||||
|
documentation: None
|
||||||
|
tool_dev_url: https://github.com/rrwick/Filtlong
|
||||||
|
doi: ""
|
||||||
|
licence: ["GPL v3"]
|
||||||
|
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- shortreads:
|
||||||
|
type: file
|
||||||
|
description: fastq file
|
||||||
|
pattern: "*.{fq,fastq,fq.gz,fastq.gz}"
|
||||||
|
- longreads:
|
||||||
|
type: file
|
||||||
|
description: fastq file
|
||||||
|
pattern: "*.{fq,fastq,fq.gz,fastq.gz}"
|
||||||
|
|
||||||
|
output:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: "versions.yml"
|
||||||
|
- reads:
|
||||||
|
type: file
|
||||||
|
description: Filtered (compressed) fastq file
|
||||||
|
pattern: "*.fastq.gz"
|
||||||
|
|
||||||
|
authors:
|
||||||
|
- "@d4straub"
|
|
@ -55,16 +55,23 @@ params {
|
||||||
databases = null
|
databases = null
|
||||||
|
|
||||||
// FASTQ preprocessing
|
// FASTQ preprocessing
|
||||||
perform_shortread_clipmerge = false
|
perform_shortread_qc = false
|
||||||
shortread_clipmerge_tool = 'fastp'
|
shortread_qc_tool = 'fastp'
|
||||||
shortread_clipmerge_skipadaptertrim = false
|
shortread_qc_skipadaptertrim = false
|
||||||
shortread_clipmerge_mergepairs = false
|
shortread_qc_mergepairs = false
|
||||||
shortread_clipmerge_excludeunmerged = false
|
shortread_qc_excludeunmerged = false
|
||||||
shortread_clipmerge_adapter1 = null
|
shortread_qc_adapter1 = null
|
||||||
shortread_clipmerge_adapter2 = null
|
shortread_qc_adapter2 = null
|
||||||
shortread_clipmerge_minlength = 15
|
shortread_qc_minlength = 15
|
||||||
perform_longread_clip = false
|
|
||||||
save_preprocessed_reads = false
|
perform_longread_qc = false
|
||||||
|
longread_qc_run_clip = false
|
||||||
|
longread_qc_run_filter = false
|
||||||
|
longread_qc_minlength = 1000
|
||||||
|
longread_qc_keepbppercent = 90
|
||||||
|
longread_qc_targetnbases = 500000000
|
||||||
|
|
||||||
|
save_preprocessed_reads = false
|
||||||
|
|
||||||
// Complexity filtering
|
// Complexity filtering
|
||||||
perform_shortread_complexityfilter = false
|
perform_shortread_complexityfilter = false
|
||||||
|
@ -185,6 +192,7 @@ profiles {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load igenomes.config if required
|
// Load igenomes.config if required
|
||||||
|
|
||||||
if (!params.igenomes_ignore) {
|
if (!params.igenomes_ignore) {
|
||||||
includeConfig 'conf/igenomes.config'
|
includeConfig 'conf/igenomes.config'
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -262,7 +262,7 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"default": "None"
|
"default": "None"
|
||||||
},
|
},
|
||||||
"shortread_clipmerge_excludeunmerged": {
|
"shortread_qc_excludeunmerged": {
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
"run_malt": {
|
"run_malt": {
|
||||||
|
@ -291,26 +291,26 @@
|
||||||
"type": "boolean",
|
"type": "boolean",
|
||||||
"description": "Enable MetaPhlAn for taxonomic profiling"
|
"description": "Enable MetaPhlAn for taxonomic profiling"
|
||||||
},
|
},
|
||||||
"shortread_clipmerge_tool": {
|
"shortread_qc_tool": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"default": "fastp",
|
"default": "fastp",
|
||||||
"enum": ["fastp", "adapterremoval"]
|
"enum": ["fastp", "adapterremoval"]
|
||||||
},
|
},
|
||||||
"shortread_clipmerge_skipadaptertrim": {
|
"shortread_qc_skipadaptertrim": {
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
"shortread_clipmerge_mergepairs": {
|
"shortread_qc_mergepairs": {
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
"shortread_clipmerge_adapter1": {
|
"shortread_qc_adapter1": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"default": "None"
|
"default": "None"
|
||||||
},
|
},
|
||||||
"shortread_clipmerge_adapter2": {
|
"shortread_qc_adapter2": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"default": "None"
|
"default": "None"
|
||||||
},
|
},
|
||||||
"shortread_clipmerge_minlength": {
|
"shortread_qc_minlength": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"default": 15
|
"default": 15
|
||||||
},
|
},
|
||||||
|
@ -347,10 +347,10 @@
|
||||||
"save_runmerged_reads": {
|
"save_runmerged_reads": {
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
"perform_shortread_clipmerge": {
|
"perform_shortread_qc": {
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
"perform_longread_clip": {
|
"perform_longread_qc": {
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
"perform_shortread_complexityfilter": {
|
"perform_shortread_complexityfilter": {
|
||||||
|
@ -397,6 +397,24 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"default": "tsv",
|
"default": "tsv",
|
||||||
"enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"]
|
"enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"]
|
||||||
|
},
|
||||||
|
"longread_qc_run_clip": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"longread_qc_run_filter": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"longread_qc_minlength": {
|
||||||
|
"type": "integer",
|
||||||
|
"default": 1000
|
||||||
|
},
|
||||||
|
"longread_qc_keepbppercent": {
|
||||||
|
"type": "integer",
|
||||||
|
"default": 90
|
||||||
|
},
|
||||||
|
"longread_qc_targetnbases": {
|
||||||
|
"type": "integer",
|
||||||
|
"default": 500000000
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
|
|
||||||
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main'
|
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main'
|
||||||
include { PORECHOP } from '../../modules/nf-core/modules/porechop/main'
|
include { PORECHOP } from '../../modules/nf-core/modules/porechop/main'
|
||||||
|
include { FILTLONG } from '../../modules/nf-core/modules/filtlong/main'
|
||||||
|
|
||||||
workflow LONGREAD_PREPROCESSING {
|
workflow LONGREAD_PREPROCESSING {
|
||||||
take:
|
take:
|
||||||
|
@ -13,21 +14,43 @@ workflow LONGREAD_PREPROCESSING {
|
||||||
ch_versions = Channel.empty()
|
ch_versions = Channel.empty()
|
||||||
ch_multiqc_files = Channel.empty()
|
ch_multiqc_files = Channel.empty()
|
||||||
|
|
||||||
PORECHOP ( reads )
|
if ( params.longread_qc_run_clip && !params.longread_qc_run_filter ) {
|
||||||
|
PORECHOP ( reads )
|
||||||
|
|
||||||
ch_processed_reads = PORECHOP.out.reads
|
ch_processed_reads = PORECHOP.out.reads
|
||||||
.map {
|
.map {
|
||||||
meta, reads ->
|
meta, reads ->
|
||||||
def meta_new = meta.clone()
|
def meta_new = meta.clone()
|
||||||
meta_new['single_end'] = 1
|
meta_new['single_end'] = 1
|
||||||
[ meta_new, reads ]
|
[ meta_new, reads ]
|
||||||
}
|
|
||||||
|
|
||||||
FASTQC_PROCESSED ( PORECHOP.out.reads )
|
ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
|
||||||
ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
|
}
|
||||||
|
} else if ( !params.longread_qc_run_clip && params.longread_qc_run_filter ) {
|
||||||
|
|
||||||
|
ch_processed_reads = FILTLONG ( reads.map{ meta, reads -> [meta, [], reads ]} )
|
||||||
|
ch_versions = ch_versions.mix(FILTLONG.out.versions.first())
|
||||||
|
|
||||||
|
} else {
|
||||||
|
PORECHOP ( reads )
|
||||||
|
ch_clipped_reads = PORECHOP.out.reads
|
||||||
|
.map {
|
||||||
|
meta, reads ->
|
||||||
|
def meta_new = meta.clone()
|
||||||
|
meta_new['single_end'] = 1
|
||||||
|
[ meta_new, reads ]
|
||||||
|
}
|
||||||
|
|
||||||
|
ch_processed_reads = FILTLONG ( ch_clipped_reads.map{ meta, reads -> [meta, [], reads ]} ).reads
|
||||||
|
|
||||||
|
ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
|
||||||
|
ch_versions = ch_versions.mix(FILTLONG.out.versions.first())
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
FASTQC_PROCESSED ( ch_processed_reads.dump(tag: "filtlong") )
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
|
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
|
||||||
|
|
||||||
|
|
||||||
emit:
|
emit:
|
||||||
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
|
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
|
||||||
versions = ch_versions // channel: [ versions.yml ]
|
versions = ch_versions // channel: [ versions.yml ]
|
||||||
|
|
|
@ -29,7 +29,7 @@ workflow SHORTREAD_ADAPTERREMOVAL {
|
||||||
* has to be exported in a separate channel and we must manually recombine when necessary.
|
* has to be exported in a separate channel and we must manually recombine when necessary.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) {
|
if ( params.shortread_qc_mergepairs && !params.shortread_qc_excludeunmerged ) {
|
||||||
|
|
||||||
ch_concat_fastq = Channel.empty()
|
ch_concat_fastq = Channel.empty()
|
||||||
.mix(
|
.mix(
|
||||||
|
@ -54,7 +54,7 @@ workflow SHORTREAD_ADAPTERREMOVAL {
|
||||||
ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
|
ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
|
||||||
.mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
|
.mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
|
||||||
|
|
||||||
} else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) {
|
} else if ( params.shortread_qc_mergepairs && params.shortread_qc_excludeunmerged ) {
|
||||||
|
|
||||||
ch_concat_fastq = Channel.empty()
|
ch_concat_fastq = Channel.empty()
|
||||||
.mix(
|
.mix(
|
||||||
|
|
|
@ -21,9 +21,9 @@ workflow SHORTREAD_FASTP {
|
||||||
|
|
||||||
FASTP_SINGLE ( ch_input_for_fastp.single, false, false )
|
FASTP_SINGLE ( ch_input_for_fastp.single, false, false )
|
||||||
// Last parameter here turns on merging of PE data
|
// Last parameter here turns on merging of PE data
|
||||||
FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_clipmerge_mergepairs )
|
FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_qc_mergepairs )
|
||||||
|
|
||||||
if ( params.shortread_clipmerge_mergepairs ) {
|
if ( params.shortread_qc_mergepairs ) {
|
||||||
ch_fastp_reads_prepped_pe = FASTP_PAIRED.out.reads_merged
|
ch_fastp_reads_prepped_pe = FASTP_PAIRED.out.reads_merged
|
||||||
.map {
|
.map {
|
||||||
meta, reads ->
|
meta, reads ->
|
||||||
|
|
|
@ -15,11 +15,11 @@ workflow SHORTREAD_PREPROCESSING {
|
||||||
ch_versions = Channel.empty()
|
ch_versions = Channel.empty()
|
||||||
ch_multiqc_files = Channel.empty()
|
ch_multiqc_files = Channel.empty()
|
||||||
|
|
||||||
if ( params.shortread_clipmerge_tool == "fastp" ) {
|
if ( params.shortread_qc_tool == "fastp" ) {
|
||||||
ch_processed_reads = SHORTREAD_FASTP ( reads ).reads
|
ch_processed_reads = SHORTREAD_FASTP ( reads ).reads
|
||||||
ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions )
|
ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions )
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc )
|
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc )
|
||||||
} else if ( params.shortread_clipmerge_tool == "adapterremoval" ) {
|
} else if ( params.shortread_qc_tool == "adapterremoval" ) {
|
||||||
ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads
|
ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads
|
||||||
ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions )
|
ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions )
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc )
|
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc )
|
||||||
|
|
|
@ -19,8 +19,11 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true
|
||||||
// Check mandatory parameters
|
// Check mandatory parameters
|
||||||
if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
|
if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
|
||||||
if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
|
if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
|
||||||
if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files."
|
|
||||||
if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "ERROR: [nf-core/taxprofiler] cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs"
|
if (params.shortread_qc_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] MALT does not accept uncollapsed paired-short reads. Pairs will be profiled as separate files."
|
||||||
|
if (params.shortread_qc_excludeunmerged && !params.shortread_qc_mergepairs) exit 1, "ERROR: [nf-core/taxprofiler] cannot include unmerged reads when merging not turned on. Please specify --shortread_qc_mergepairs"
|
||||||
|
|
||||||
|
if ( (params.longread_qc_run_clip || params.longread_qc_run_filter) & !params.perform_longread_qc ) exit 1, "ERROR: [nf-core/taxprofiler] --longread_qc_run_clip or --longread_qc_run_filter requested but quality-control not turned on. Please specify --perform_long_qc"
|
||||||
|
|
||||||
if (params.perform_shortread_hostremoval && !params.shortread_hostremoval_reference) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval requested but no --shortread_hostremoval_reference FASTA supplied. Check input." }
|
if (params.perform_shortread_hostremoval && !params.shortread_hostremoval_reference) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval requested but no --shortread_hostremoval_reference FASTA supplied. Check input." }
|
||||||
if (!params.shortread_hostremoval_reference && params.shortread_hostremoval_reference_index) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval_index provided but no --shortread_hostremoval_reference FASTA supplied. Check input." }
|
if (!params.shortread_hostremoval_reference && params.shortread_hostremoval_reference_index) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval_index provided but no --shortread_hostremoval_reference FASTA supplied. Check input." }
|
||||||
|
@ -110,14 +113,14 @@ workflow TAXPROFILER {
|
||||||
/*
|
/*
|
||||||
SUBWORKFLOW: PERFORM PREPROCESSING
|
SUBWORKFLOW: PERFORM PREPROCESSING
|
||||||
*/
|
*/
|
||||||
if ( params.perform_shortread_clipmerge ) {
|
if ( params.perform_shortread_qc ) {
|
||||||
ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq ).reads
|
ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq ).reads
|
||||||
ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions )
|
ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions )
|
||||||
} else {
|
} else {
|
||||||
ch_shortreads_preprocessed = INPUT_CHECK.out.fastq
|
ch_shortreads_preprocessed = INPUT_CHECK.out.fastq
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( params.perform_longread_clip ) {
|
if ( params.perform_longread_qc ) {
|
||||||
ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads
|
ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads
|
||||||
.map { it -> [ it[0], [it[1]] ] }
|
.map { it -> [ it[0], [it[1]] ] }
|
||||||
ch_versions = ch_versions.mix( LONGREAD_PREPROCESSING.out.versions )
|
ch_versions = ch_versions.mix( LONGREAD_PREPROCESSING.out.versions )
|
||||||
|
@ -211,11 +214,11 @@ workflow TAXPROFILER {
|
||||||
ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
|
ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
|
||||||
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
|
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
|
||||||
|
|
||||||
if (params.perform_shortread_clipmerge) {
|
if (params.perform_shortread_qc) {
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
|
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
|
||||||
}
|
}
|
||||||
|
|
||||||
if (params.perform_longread_clip) {
|
if (params.perform_longread_qc) {
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
|
ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue