mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-22 09:59:55 +00:00
Merge pull request #74 from nf-core/add-filtlong
Add initial longread QC filtering - requires filtlong module update
This commit is contained in:
commit
43ea8d299c
17 changed files with 237 additions and 79 deletions
18
.github/workflows/ci.yml
vendored
18
.github/workflows/ci.yml
vendored
|
@ -29,18 +29,18 @@ jobs:
|
||||||
- NXF_VER: ""
|
- NXF_VER: ""
|
||||||
NXF_EDGE: "1"
|
NXF_EDGE: "1"
|
||||||
parameters:
|
parameters:
|
||||||
- "--perform_longread_clip false"
|
- "--perform_longread_qc false"
|
||||||
- "--perform_shortread_clipmerge false"
|
- "--perform_shortread_qc false"
|
||||||
- "--shortread_clipmerge_tool fastp"
|
- "--shortread_qc_tool fastp"
|
||||||
- "--shortread_clipmerge_tool fastp --shortread_clipmerge_mergepairs --shortread_clipmerge_excludeunmerged"
|
- "--shortread_qc_tool fastp --shortread_qc_mergepairs --shortread_qc_excludeunmerged"
|
||||||
- "--shortread_clipmerge_tool fastp --shortread_clipmerge_mergepairs"
|
- "--shortread_qc_tool fastp --shortread_qc_mergepairs"
|
||||||
- "--shortread_clipmerge_tool adapterremoval"
|
- "--shortread_qc_tool adapterremoval"
|
||||||
- "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs --shortread_clipmerge_excludeunmerged"
|
- "--shortread_qc_tool adapterremoval --shortread_qc_mergepairs --shortread_qc_excludeunmerged"
|
||||||
- "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs"
|
- "--shortread_qc_tool adapterremoval --shortread_qc_mergepairs"
|
||||||
- "--shortread_complexityfilter_tool bbduk"
|
- "--shortread_complexityfilter_tool bbduk"
|
||||||
- "--shortread_complexityfilter_tool prinseqplusplus"
|
- "--shortread_complexityfilter_tool prinseqplusplus"
|
||||||
- "--perform_runmerging"
|
- "--perform_runmerging"
|
||||||
- "--perform_runmerging --shortread_clipmerge_mergepairs"
|
- "--perform_runmerging --shortread_qc_mergepairs"
|
||||||
- "--shortread_complexityfilter false --perform_shortread_hostremoval"
|
- "--shortread_complexityfilter false --perform_shortread_hostremoval"
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
|
|
|
@ -56,6 +56,8 @@
|
||||||
|
|
||||||
> Buchfink, Benjamin, Chao Xie, and Daniel H. Huson. 2015. “Fast and Sensitive Protein Alignment Using DIAMOND.” Nature Methods 12 (1): 59-60. doi: 10.1038/nmeth.3176.
|
> Buchfink, Benjamin, Chao Xie, and Daniel H. Huson. 2015. “Fast and Sensitive Protein Alignment Using DIAMOND.” Nature Methods 12 (1): 59-60. doi: 10.1038/nmeth.3176.
|
||||||
|
|
||||||
|
- [FILTLONG](https://github.com/rrwick/Filtlong)
|
||||||
|
|
||||||
## Software packaging/containerisation tools
|
## Software packaging/containerisation tools
|
||||||
|
|
||||||
- [Anaconda](https://anaconda.com)
|
- [Anaconda](https://anaconda.com)
|
||||||
|
|
|
@ -51,10 +51,10 @@ process {
|
||||||
withName: FASTP_SINGLE {
|
withName: FASTP_SINGLE {
|
||||||
ext.args = [
|
ext.args = [
|
||||||
// trimming options
|
// trimming options
|
||||||
params.shortread_clipmerge_skipadaptertrim ? "--disable_adapter_trimming" : "",
|
params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "",
|
||||||
params.shortread_clipmerge_adapter1 ? "--adapter_sequence ${params.shortread_clipmerge_adapter1}" : "",
|
params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
|
||||||
// filtering options
|
// filtering options
|
||||||
"--length_required ${params.shortread_clipmerge_minlength}",
|
"--length_required ${params.shortread_qc_minlength}",
|
||||||
(params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp') ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : ''
|
(params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp') ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : ''
|
||||||
].join(' ').trim()
|
].join(' ').trim()
|
||||||
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||||
|
@ -69,13 +69,13 @@ process {
|
||||||
withName: FASTP_PAIRED {
|
withName: FASTP_PAIRED {
|
||||||
ext.args = [
|
ext.args = [
|
||||||
// collapsing options - option to retain singletons
|
// collapsing options - option to retain singletons
|
||||||
params.shortread_clipmerge_excludeunmerged ? '' : "--include_unmerged",
|
params.shortread_qc_excludeunmerged ? '' : "--include_unmerged",
|
||||||
// trimming options
|
// trimming options
|
||||||
params.shortread_clipmerge_skipadaptertrim ? "--disable_adapter_trimming" : "",
|
params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "",
|
||||||
params.shortread_clipmerge_adapter1 ? "--adapter_sequence ${params.shortread_clipmerge_adapter1}" : "",
|
params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
|
||||||
params.shortread_clipmerge_adapter2 ? "--adapter_sequence_r2 ${params.shortread_clipmerge_adapter2}" : "--detect_adapter_for_pe",
|
params.shortread_qc_adapter2 ? "--adapter_sequence_r2 ${params.shortread_qc_adapter2}" : "--detect_adapter_for_pe",
|
||||||
// filtering options
|
// filtering options
|
||||||
"--length_required ${params.shortread_clipmerge_minlength}",
|
"--length_required ${params.shortread_qc_minlength}",
|
||||||
params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp' ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : ''
|
params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp' ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : ''
|
||||||
].join(' ').trim()
|
].join(' ').trim()
|
||||||
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||||
|
@ -90,10 +90,10 @@ process {
|
||||||
withName: ADAPTERREMOVAL_SINGLE {
|
withName: ADAPTERREMOVAL_SINGLE {
|
||||||
ext.args = [
|
ext.args = [
|
||||||
// trimming options
|
// trimming options
|
||||||
params.shortread_clipmerge_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
|
params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
|
||||||
params.shortread_clipmerge_adapter1 ? "--adapter1 ${params.shortread_clipmerge_adapter1}" : "",
|
params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "",
|
||||||
// filtering options
|
// filtering options
|
||||||
"--minlength ${params.shortread_clipmerge_minlength}"
|
"--minlength ${params.shortread_qc_minlength}"
|
||||||
].join(' ').trim()
|
].join(' ').trim()
|
||||||
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||||
publishDir = [
|
publishDir = [
|
||||||
|
@ -107,13 +107,13 @@ process {
|
||||||
withName: ADAPTERREMOVAL_PAIRED {
|
withName: ADAPTERREMOVAL_PAIRED {
|
||||||
ext.args = [
|
ext.args = [
|
||||||
// collapsing options
|
// collapsing options
|
||||||
params.shortread_clipmerge_mergepairs ? "--collapse" : "",
|
params.shortread_qc_mergepairs ? "--collapse" : "",
|
||||||
// trimming options
|
// trimming options
|
||||||
params.shortread_clipmerge_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
|
params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
|
||||||
params.shortread_clipmerge_adapter1 ? "--adapter1 ${params.shortread_clipmerge_adapter1}" : "",
|
params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "",
|
||||||
params.shortread_clipmerge_adapter2 ? "--adapter2 ${params.shortread_clipmerge_adapter2}" : "",
|
params.shortread_qc_adapter2 ? "--adapter2 ${params.shortread_qc_adapter2}" : "",
|
||||||
// filtering options
|
// filtering options
|
||||||
"--minlength ${params.shortread_clipmerge_minlength}"
|
"--minlength ${params.shortread_qc_minlength}"
|
||||||
].join(' ').trim()
|
].join(' ').trim()
|
||||||
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||||
publishDir = [
|
publishDir = [
|
||||||
|
@ -134,6 +134,22 @@ process {
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
withName: FILTLONG {
|
||||||
|
ext.args = [
|
||||||
|
"--min_length ${params.longread_qc_minlength}",
|
||||||
|
"--keep_percent ${params.longread_qc_keep_percent}",
|
||||||
|
"--target_bases ${params.longread_qc_target_bases}"
|
||||||
|
]
|
||||||
|
.join(' ').trim()
|
||||||
|
ext.prefix = { "${meta.id}_${meta.run_accession}_filtered" }
|
||||||
|
publishDir = [
|
||||||
|
path: { "${params.outdir}/filtlong" },
|
||||||
|
mode: params.publish_dir_mode,
|
||||||
|
pattern: '*.fastq.gz',
|
||||||
|
enabled: params.save_preprocessed_reads
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
withName: BOWTIE2_BUILD {
|
withName: BOWTIE2_BUILD {
|
||||||
publishDir = [
|
publishDir = [
|
||||||
path: { "${params.outdir}/bowtie2/build" },
|
path: { "${params.outdir}/bowtie2/build" },
|
||||||
|
|
|
@ -24,8 +24,8 @@ params {
|
||||||
// TODO nf-core: Give any required params for the test so that command line flags are not needed
|
// TODO nf-core: Give any required params for the test so that command line flags are not needed
|
||||||
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
|
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
|
||||||
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
|
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
|
||||||
perform_shortread_clipmerge = true
|
perform_shortread_qc = true
|
||||||
perform_longread_clip = false
|
perform_longread_qc = true
|
||||||
perform_shortread_complexityfilter = true
|
perform_shortread_complexityfilter = true
|
||||||
perform_shortread_hostremoval = true
|
perform_shortread_hostremoval = true
|
||||||
perform_longread_hostremoval = true
|
perform_longread_hostremoval = true
|
||||||
|
|
|
@ -24,8 +24,8 @@ params {
|
||||||
// TODO nf-core: Give any required params for the test so that command line flags are not needed
|
// TODO nf-core: Give any required params for the test so that command line flags are not needed
|
||||||
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
|
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
|
||||||
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
|
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
|
||||||
perform_shortread_clipmerge = false
|
perform_shortread_qc = false
|
||||||
perform_longread_clip = false
|
perform_longread_qc = false
|
||||||
perform_shortread_complexityfilter = false
|
perform_shortread_complexityfilter = false
|
||||||
perform_shortread_hostremoval = false
|
perform_shortread_hostremoval = false
|
||||||
perform_longread_hostremoval = false
|
perform_longread_hostremoval = false
|
||||||
|
|
|
@ -24,8 +24,8 @@ params {
|
||||||
// TODO nf-core: Give any required params for the test so that command line flags are not needed
|
// TODO nf-core: Give any required params for the test so that command line flags are not needed
|
||||||
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
|
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
|
||||||
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
|
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
|
||||||
perform_shortread_clipmerge = true
|
perform_shortread_qc = true
|
||||||
perform_longread_clip = true
|
perform_longread_qc = true
|
||||||
perform_shortread_complexityfilter = true
|
perform_shortread_complexityfilter = true
|
||||||
perform_shortread_hostremoval = true
|
perform_shortread_hostremoval = true
|
||||||
perform_longread_hostremoval = true
|
perform_longread_hostremoval = true
|
||||||
|
|
|
@ -162,16 +162,16 @@ nf-core/taxprofiler offers four main preprocessing steps
|
||||||
|
|
||||||
#### Read Processing
|
#### Read Processing
|
||||||
|
|
||||||
Raw sequencing read processing in the form of adapter clipping and paired-end read merging can be activated via the `--perform_shortread_clipmerge` or `--perform_longread_clip` flags.
|
Raw sequencing read processing in the form of adapter clipping and paired-end read merging can be activated via the `--perform_shortread_qc` or `--perform_longread_qc` flags.
|
||||||
|
|
||||||
It is highly recommended to run this on raw reads to remove artefacts from sequencing that can cause false positive identification of taxa (e.g. contaminated reference genomes) and/or skews in taxonomic abundance profiles.
|
It is highly recommended to run this on raw reads to remove artefacts from sequencing that can cause false positive identification of taxa (e.g. contaminated reference genomes) and/or skews in taxonomic abundance profiles.
|
||||||
|
|
||||||
There are currently two options for short-read preprocessing: `fastp` or `adapterremoval`.
|
There are currently two options for short-read preprocessing: `fastp` or `adapterremoval`.
|
||||||
|
|
||||||
For adapter clipping, you can either rely on tool default adapter sequences, or supply your own adapters (`--shortread_clipmerge_adapter1` and `--shortread_clipmerge_adapter2`)
|
For adapter clipping, you can either rely on tool default adapter sequences, or supply your own adapters (`--shortread_qc_adapter1` and `--shortread_qc_adapter2`)
|
||||||
By default, paired-end merging is not activated and paired-end profiling is performed where supported otherwise pairs will be independently profiled. If paired-end merging is activated you can also specify whether to exclude unmerged reads in the reads sent for profiling (`--shortread_clipmerge_mergepairs` and `--shortread_clipmerge_excludeunmerged`).
|
By default, paired-end merging is not activated and paired-end profiling is performed where supported otherwise pairs will be independently profiled. If paired-end merging is activated you can also specify whether to exclude unmerged reads in the reads sent for profiling (`--shortread_qc_mergepairs` and `--shortread_qc_excludeunmerged`).
|
||||||
You can also turn off clipping and only perform paired-end merging, if requested. This can be useful when processing data downloaded from the ENA, SRA, or DDBJ (`--shortread_clipmerge_skipadaptertrim`).
|
You can also turn off clipping and only perform paired-end merging, if requested. This can be useful when processing data downloaded from the ENA, SRA, or DDBJ (`--shortread_qc_skipadaptertrim`).
|
||||||
Both tools support length filtering of reads and can be tuned with `--shortread_clipmerge_minlength`. Performing length filtering can be useful to remove short (often low sequencing complexity) sequences that result in unspecific classification and therefore slow down runtime during profiling, with minimal gain.
|
Both tools support length filtering of reads and can be tuned with `--shortread_qc_minlength`. Performing length filtering can be useful to remove short (often low sequencing complexity) sequences that result in unspecific classification and therefore slow down runtime during profiling, with minimal gain.
|
||||||
|
|
||||||
There is currently one option for long-read Oxford Nanopore processing: `porechop`.
|
There is currently one option for long-read Oxford Nanopore processing: `porechop`.
|
||||||
|
|
||||||
|
|
|
@ -36,6 +36,9 @@
|
||||||
"fastqc": {
|
"fastqc": {
|
||||||
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
||||||
},
|
},
|
||||||
|
"filtlong": {
|
||||||
|
"git_sha": "089f761f0bf79c4a486f1df9b6205f650196a2c1"
|
||||||
|
},
|
||||||
"kaiju/kaiju": {
|
"kaiju/kaiju": {
|
||||||
"git_sha": "8856f127c58f6af479128be8b8df4d42e442ddbe"
|
"git_sha": "8856f127c58f6af479128be8b8df4d42e442ddbe"
|
||||||
},
|
},
|
||||||
|
|
37
modules/nf-core/modules/filtlong/main.nf
generated
Normal file
37
modules/nf-core/modules/filtlong/main.nf
generated
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
process FILTLONG {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_low'
|
||||||
|
|
||||||
|
conda (params.enable_conda ? "bioconda::filtlong=0.2.1" : null)
|
||||||
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
|
'https://depot.galaxyproject.org/singularity/filtlong:0.2.1--h9a82719_0' :
|
||||||
|
'quay.io/biocontainers/filtlong:0.2.1--h9a82719_0' }"
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(shortreads), path(longreads)
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path("*.fastq.gz"), emit: reads
|
||||||
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
|
when:
|
||||||
|
task.ext.when == null || task.ext.when
|
||||||
|
|
||||||
|
script:
|
||||||
|
def args = task.ext.args ?: ''
|
||||||
|
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||||
|
def short_reads = !shortreads ? "" : meta.single_end ? "-1 $shortreads" : "-1 ${shortreads[0]} -2 ${shortreads[1]}"
|
||||||
|
if ("$longreads" == "${prefix}.fastq.gz") error "Longread FASTQ input and output names are the same, set prefix in module configuration to disambiguate!"
|
||||||
|
"""
|
||||||
|
filtlong \\
|
||||||
|
$short_reads \\
|
||||||
|
$args \\
|
||||||
|
$longreads \\
|
||||||
|
| gzip -n > ${prefix}.fastq.gz
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
filtlong: \$( filtlong --version | sed -e "s/Filtlong v//g" )
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
50
modules/nf-core/modules/filtlong/meta.yml
generated
Normal file
50
modules/nf-core/modules/filtlong/meta.yml
generated
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
name: filtlong
|
||||||
|
description: Filtlong filters long reads based on quality measures or short read data.
|
||||||
|
keywords:
|
||||||
|
- nanopore
|
||||||
|
- quality control
|
||||||
|
- QC
|
||||||
|
- filtering
|
||||||
|
- long reads
|
||||||
|
- short reads
|
||||||
|
tools:
|
||||||
|
- filtlong:
|
||||||
|
description: Filtlong is a tool for filtering long reads. It can take a set of long reads and produce a smaller, better subset. It uses both read length (longer is better) and read identity (higher is better) when choosing which reads pass the filter.
|
||||||
|
homepage: https://anaconda.org/bioconda/filtlong
|
||||||
|
documentation: None
|
||||||
|
tool_dev_url: https://github.com/rrwick/Filtlong
|
||||||
|
doi: ""
|
||||||
|
licence: ["GPL v3"]
|
||||||
|
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- shortreads:
|
||||||
|
type: file
|
||||||
|
description: fastq file
|
||||||
|
pattern: "*.{fq,fastq,fq.gz,fastq.gz}"
|
||||||
|
- longreads:
|
||||||
|
type: file
|
||||||
|
description: fastq file
|
||||||
|
pattern: "*.{fq,fastq,fq.gz,fastq.gz}"
|
||||||
|
|
||||||
|
output:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: "versions.yml"
|
||||||
|
- reads:
|
||||||
|
type: file
|
||||||
|
description: Filtered (compressed) fastq file
|
||||||
|
pattern: "*.fastq.gz"
|
||||||
|
|
||||||
|
authors:
|
||||||
|
- "@d4straub"
|
|
@ -55,16 +55,23 @@ params {
|
||||||
databases = null
|
databases = null
|
||||||
|
|
||||||
// FASTQ preprocessing
|
// FASTQ preprocessing
|
||||||
perform_shortread_clipmerge = false
|
perform_shortread_qc = false
|
||||||
shortread_clipmerge_tool = 'fastp'
|
shortread_qc_tool = 'fastp'
|
||||||
shortread_clipmerge_skipadaptertrim = false
|
shortread_qc_skipadaptertrim = false
|
||||||
shortread_clipmerge_mergepairs = false
|
shortread_qc_mergepairs = false
|
||||||
shortread_clipmerge_excludeunmerged = false
|
shortread_qc_excludeunmerged = false
|
||||||
shortread_clipmerge_adapter1 = null
|
shortread_qc_adapter1 = null
|
||||||
shortread_clipmerge_adapter2 = null
|
shortread_qc_adapter2 = null
|
||||||
shortread_clipmerge_minlength = 15
|
shortread_qc_minlength = 15
|
||||||
perform_longread_clip = false
|
|
||||||
save_preprocessed_reads = false
|
perform_longread_qc = false
|
||||||
|
longread_qc_run_clip = false
|
||||||
|
longread_qc_run_filter = false
|
||||||
|
longread_qc_minlength = 1000
|
||||||
|
longread_qc_keep_percent = 90
|
||||||
|
longread_qc_target_bases = 500000000
|
||||||
|
|
||||||
|
save_preprocessed_reads = false
|
||||||
|
|
||||||
// Complexity filtering
|
// Complexity filtering
|
||||||
perform_shortread_complexityfilter = false
|
perform_shortread_complexityfilter = false
|
||||||
|
@ -191,6 +198,7 @@ profiles {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load igenomes.config if required
|
// Load igenomes.config if required
|
||||||
|
|
||||||
if (!params.igenomes_ignore) {
|
if (!params.igenomes_ignore) {
|
||||||
includeConfig 'conf/igenomes.config'
|
includeConfig 'conf/igenomes.config'
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -262,7 +262,7 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"default": "None"
|
"default": "None"
|
||||||
},
|
},
|
||||||
"shortread_clipmerge_excludeunmerged": {
|
"shortread_qc_excludeunmerged": {
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
"run_malt": {
|
"run_malt": {
|
||||||
|
@ -291,26 +291,26 @@
|
||||||
"type": "boolean",
|
"type": "boolean",
|
||||||
"description": "Enable MetaPhlAn for taxonomic profiling"
|
"description": "Enable MetaPhlAn for taxonomic profiling"
|
||||||
},
|
},
|
||||||
"shortread_clipmerge_tool": {
|
"shortread_qc_tool": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"default": "fastp",
|
"default": "fastp",
|
||||||
"enum": ["fastp", "adapterremoval"]
|
"enum": ["fastp", "adapterremoval"]
|
||||||
},
|
},
|
||||||
"shortread_clipmerge_skipadaptertrim": {
|
"shortread_qc_skipadaptertrim": {
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
"shortread_clipmerge_mergepairs": {
|
"shortread_qc_mergepairs": {
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
"shortread_clipmerge_adapter1": {
|
"shortread_qc_adapter1": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"default": "None"
|
"default": "None"
|
||||||
},
|
},
|
||||||
"shortread_clipmerge_adapter2": {
|
"shortread_qc_adapter2": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"default": "None"
|
"default": "None"
|
||||||
},
|
},
|
||||||
"shortread_clipmerge_minlength": {
|
"shortread_qc_minlength": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"default": 15
|
"default": 15
|
||||||
},
|
},
|
||||||
|
@ -348,10 +348,10 @@
|
||||||
"save_runmerged_reads": {
|
"save_runmerged_reads": {
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
"perform_shortread_clipmerge": {
|
"perform_shortread_qc": {
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
"perform_longread_clip": {
|
"perform_longread_qc": {
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
"perform_shortread_complexityfilter": {
|
"perform_shortread_complexityfilter": {
|
||||||
|
@ -409,6 +409,24 @@
|
||||||
"shortread_complexityfilter_fastp_threshold": {
|
"shortread_complexityfilter_fastp_threshold": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"default": 30
|
"default": 30
|
||||||
|
},
|
||||||
|
"longread_qc_run_clip": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"longread_qc_run_filter": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"longread_qc_minlength": {
|
||||||
|
"type": "integer",
|
||||||
|
"default": 1000
|
||||||
|
},
|
||||||
|
"longread_qc_keep_percent": {
|
||||||
|
"type": "integer",
|
||||||
|
"default": 90
|
||||||
|
},
|
||||||
|
"longread_qc_target_bases": {
|
||||||
|
"type": "integer",
|
||||||
|
"default": 500000000
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
|
|
||||||
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main'
|
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main'
|
||||||
include { PORECHOP } from '../../modules/nf-core/modules/porechop/main'
|
include { PORECHOP } from '../../modules/nf-core/modules/porechop/main'
|
||||||
|
include { FILTLONG } from '../../modules/nf-core/modules/filtlong/main'
|
||||||
|
|
||||||
workflow LONGREAD_PREPROCESSING {
|
workflow LONGREAD_PREPROCESSING {
|
||||||
take:
|
take:
|
||||||
|
@ -13,21 +14,43 @@ workflow LONGREAD_PREPROCESSING {
|
||||||
ch_versions = Channel.empty()
|
ch_versions = Channel.empty()
|
||||||
ch_multiqc_files = Channel.empty()
|
ch_multiqc_files = Channel.empty()
|
||||||
|
|
||||||
PORECHOP ( reads )
|
if ( params.longread_qc_run_clip && !params.longread_qc_run_filter ) {
|
||||||
|
PORECHOP ( reads )
|
||||||
|
|
||||||
ch_processed_reads = PORECHOP.out.reads
|
ch_processed_reads = PORECHOP.out.reads
|
||||||
.map {
|
.map {
|
||||||
meta, reads ->
|
meta, reads ->
|
||||||
def meta_new = meta.clone()
|
def meta_new = meta.clone()
|
||||||
meta_new['single_end'] = 1
|
meta_new['single_end'] = 1
|
||||||
[ meta_new, reads ]
|
[ meta_new, reads ]
|
||||||
}
|
|
||||||
|
|
||||||
FASTQC_PROCESSED ( PORECHOP.out.reads )
|
ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
|
||||||
ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
|
}
|
||||||
|
} else if ( !params.longread_qc_run_clip && params.longread_qc_run_filter ) {
|
||||||
|
|
||||||
|
ch_processed_reads = FILTLONG ( reads.map{ meta, reads -> [meta, [], reads ]} )
|
||||||
|
ch_versions = ch_versions.mix(FILTLONG.out.versions.first())
|
||||||
|
|
||||||
|
} else {
|
||||||
|
PORECHOP ( reads )
|
||||||
|
ch_clipped_reads = PORECHOP.out.reads
|
||||||
|
.map {
|
||||||
|
meta, reads ->
|
||||||
|
def meta_new = meta.clone()
|
||||||
|
meta_new['single_end'] = 1
|
||||||
|
[ meta_new, reads ]
|
||||||
|
}
|
||||||
|
|
||||||
|
ch_processed_reads = FILTLONG ( ch_clipped_reads.map{ meta, reads -> [meta, [], reads ]} ).reads
|
||||||
|
|
||||||
|
ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
|
||||||
|
ch_versions = ch_versions.mix(FILTLONG.out.versions.first())
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
FASTQC_PROCESSED ( ch_processed_reads.dump(tag: "filtlong") )
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
|
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
|
||||||
|
|
||||||
|
|
||||||
emit:
|
emit:
|
||||||
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
|
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
|
||||||
versions = ch_versions // channel: [ versions.yml ]
|
versions = ch_versions // channel: [ versions.yml ]
|
||||||
|
|
|
@ -29,7 +29,7 @@ workflow SHORTREAD_ADAPTERREMOVAL {
|
||||||
* has to be exported in a separate channel and we must manually recombine when necessary.
|
* has to be exported in a separate channel and we must manually recombine when necessary.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) {
|
if ( params.shortread_qc_mergepairs && !params.shortread_qc_excludeunmerged ) {
|
||||||
|
|
||||||
ch_concat_fastq = Channel.empty()
|
ch_concat_fastq = Channel.empty()
|
||||||
.mix(
|
.mix(
|
||||||
|
@ -54,7 +54,7 @@ workflow SHORTREAD_ADAPTERREMOVAL {
|
||||||
ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
|
ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
|
||||||
.mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
|
.mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
|
||||||
|
|
||||||
} else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) {
|
} else if ( params.shortread_qc_mergepairs && params.shortread_qc_excludeunmerged ) {
|
||||||
|
|
||||||
ch_concat_fastq = Channel.empty()
|
ch_concat_fastq = Channel.empty()
|
||||||
.mix(
|
.mix(
|
||||||
|
|
|
@ -21,9 +21,9 @@ workflow SHORTREAD_FASTP {
|
||||||
|
|
||||||
FASTP_SINGLE ( ch_input_for_fastp.single, false, false )
|
FASTP_SINGLE ( ch_input_for_fastp.single, false, false )
|
||||||
// Last parameter here turns on merging of PE data
|
// Last parameter here turns on merging of PE data
|
||||||
FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_clipmerge_mergepairs )
|
FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_qc_mergepairs )
|
||||||
|
|
||||||
if ( params.shortread_clipmerge_mergepairs ) {
|
if ( params.shortread_qc_mergepairs ) {
|
||||||
ch_fastp_reads_prepped_pe = FASTP_PAIRED.out.reads_merged
|
ch_fastp_reads_prepped_pe = FASTP_PAIRED.out.reads_merged
|
||||||
.map {
|
.map {
|
||||||
meta, reads ->
|
meta, reads ->
|
||||||
|
|
|
@ -15,11 +15,11 @@ workflow SHORTREAD_PREPROCESSING {
|
||||||
ch_versions = Channel.empty()
|
ch_versions = Channel.empty()
|
||||||
ch_multiqc_files = Channel.empty()
|
ch_multiqc_files = Channel.empty()
|
||||||
|
|
||||||
if ( params.shortread_clipmerge_tool == "fastp" ) {
|
if ( params.shortread_qc_tool == "fastp" ) {
|
||||||
ch_processed_reads = SHORTREAD_FASTP ( reads ).reads
|
ch_processed_reads = SHORTREAD_FASTP ( reads ).reads
|
||||||
ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions )
|
ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions )
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc )
|
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc )
|
||||||
} else if ( params.shortread_clipmerge_tool == "adapterremoval" ) {
|
} else if ( params.shortread_qc_tool == "adapterremoval" ) {
|
||||||
ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads
|
ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads
|
||||||
ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions )
|
ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions )
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc )
|
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc )
|
||||||
|
|
|
@ -20,10 +20,11 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true
|
||||||
if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
|
if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
|
||||||
if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
|
if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
|
||||||
|
|
||||||
if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files."
|
if (params.shortread_qc_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files."
|
||||||
if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "ERROR: [nf-core/taxprofiler] cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs"
|
if (params.shortread_qc_excludeunmerged && !params.shortread_qc_mergepairs) exit 1, "ERROR: [nf-core/taxprofiler] cannot include unmerged reads when merging not turned on. Please specify --shortread_qc_mergepairs"
|
||||||
|
if ( (params.longread_qc_run_clip || params.longread_qc_run_filter) & !params.perform_longread_qc ) exit 1, "ERROR: [nf-core/taxprofiler] --longread_qc_run_clip or --longread_qc_run_filter requested but quality-control not turned on. Please specify --perform_long_qc"
|
||||||
|
|
||||||
if (params.shortread_complexityfilter_tool == 'fastp' && ( params.perform_shortread_clipmerge == false || params.shortread_clipmerge_tool != 'fastp' )) exit 1, "ERROR: [nf-core/taxprofiler] cannot use fastp complexity filtering if preprocessing not turned on and/or tool is not fastp. Please specify --perform_shortread_clipmerge and/or --shortread_clipmerge_tool 'fastp'"
|
if (params.shortread_complexityfilter_tool == 'fastp' && ( params.perform_shortread_qc == false || params.shortread_qc_tool != 'fastp' )) exit 1, "ERROR: [nf-core/taxprofiler] cannot use fastp complexity filtering if preprocessing not turned on and/or tool is not fastp. Please specify --perform_shortread_qc and/or --shortread_qc_tool 'fastp'"
|
||||||
|
|
||||||
if (params.perform_shortread_hostremoval && !params.hostremoval_reference) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval requested but no --hostremoval_reference FASTA supplied. Check input." }
|
if (params.perform_shortread_hostremoval && !params.hostremoval_reference) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval requested but no --hostremoval_reference FASTA supplied. Check input." }
|
||||||
if (!params.hostremoval_reference && params.hostremoval_reference_index) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval_index provided but no --hostremoval_reference FASTA supplied. Check input." }
|
if (!params.hostremoval_reference && params.hostremoval_reference_index) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval_index provided but no --hostremoval_reference FASTA supplied. Check input." }
|
||||||
|
@ -116,14 +117,14 @@ workflow TAXPROFILER {
|
||||||
/*
|
/*
|
||||||
SUBWORKFLOW: PERFORM PREPROCESSING
|
SUBWORKFLOW: PERFORM PREPROCESSING
|
||||||
*/
|
*/
|
||||||
if ( params.perform_shortread_clipmerge ) {
|
if ( params.perform_shortread_qc ) {
|
||||||
ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq ).reads
|
ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq ).reads
|
||||||
ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions )
|
ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions )
|
||||||
} else {
|
} else {
|
||||||
ch_shortreads_preprocessed = INPUT_CHECK.out.fastq
|
ch_shortreads_preprocessed = INPUT_CHECK.out.fastq
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( params.perform_longread_clip ) {
|
if ( params.perform_longread_qc ) {
|
||||||
ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads
|
ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads
|
||||||
.map { it -> [ it[0], [it[1]] ] }
|
.map { it -> [ it[0], [it[1]] ] }
|
||||||
ch_versions = ch_versions.mix( LONGREAD_PREPROCESSING.out.versions )
|
ch_versions = ch_versions.mix( LONGREAD_PREPROCESSING.out.versions )
|
||||||
|
@ -227,11 +228,11 @@ workflow TAXPROFILER {
|
||||||
|
|
||||||
ch_multiqc_files = ch_multiqc_files.mix(ch_taxprofiler_logo.ifEmpty([]))
|
ch_multiqc_files = ch_multiqc_files.mix(ch_taxprofiler_logo.ifEmpty([]))
|
||||||
|
|
||||||
if (params.perform_shortread_clipmerge) {
|
if (params.perform_shortread_qc) {
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
|
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
|
||||||
}
|
}
|
||||||
|
|
||||||
if (params.perform_longread_clip) {
|
if (params.perform_longread_qc) {
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
|
ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue