mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-10 23:33:09 +00:00
Merge branch 'dev' of github.com:nf-core/taxprofiler into dev
This commit is contained in:
commit
6c6f5e0913
17 changed files with 237 additions and 79 deletions
18
.github/workflows/ci.yml
vendored
18
.github/workflows/ci.yml
vendored
|
@ -29,18 +29,18 @@ jobs:
|
|||
- NXF_VER: ""
|
||||
NXF_EDGE: "1"
|
||||
parameters:
|
||||
- "--perform_longread_clip false"
|
||||
- "--perform_shortread_clipmerge false"
|
||||
- "--shortread_clipmerge_tool fastp"
|
||||
- "--shortread_clipmerge_tool fastp --shortread_clipmerge_mergepairs --shortread_clipmerge_excludeunmerged"
|
||||
- "--shortread_clipmerge_tool fastp --shortread_clipmerge_mergepairs"
|
||||
- "--shortread_clipmerge_tool adapterremoval"
|
||||
- "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs --shortread_clipmerge_excludeunmerged"
|
||||
- "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs"
|
||||
- "--perform_longread_qc false"
|
||||
- "--perform_shortread_qc false"
|
||||
- "--shortread_qc_tool fastp"
|
||||
- "--shortread_qc_tool fastp --shortread_qc_mergepairs --shortread_qc_excludeunmerged"
|
||||
- "--shortread_qc_tool fastp --shortread_qc_mergepairs"
|
||||
- "--shortread_qc_tool adapterremoval"
|
||||
- "--shortread_qc_tool adapterremoval --shortread_qc_mergepairs --shortread_qc_excludeunmerged"
|
||||
- "--shortread_qc_tool adapterremoval --shortread_qc_mergepairs"
|
||||
- "--shortread_complexityfilter_tool bbduk"
|
||||
- "--shortread_complexityfilter_tool prinseqplusplus"
|
||||
- "--perform_runmerging"
|
||||
- "--perform_runmerging --shortread_clipmerge_mergepairs"
|
||||
- "--perform_runmerging --shortread_qc_mergepairs"
|
||||
- "--shortread_complexityfilter false --perform_shortread_hostremoval"
|
||||
|
||||
steps:
|
||||
|
|
|
@ -56,6 +56,8 @@
|
|||
|
||||
> Buchfink, Benjamin, Chao Xie, and Daniel H. Huson. 2015. “Fast and Sensitive Protein Alignment Using DIAMOND.” Nature Methods 12 (1): 59-60. doi: 10.1038/nmeth.3176.
|
||||
|
||||
- [FILTLONG](https://github.com/rrwick/Filtlong)
|
||||
|
||||
## Software packaging/containerisation tools
|
||||
|
||||
- [Anaconda](https://anaconda.com)
|
||||
|
|
|
@ -51,10 +51,10 @@ process {
|
|||
withName: FASTP_SINGLE {
|
||||
ext.args = [
|
||||
// trimming options
|
||||
params.shortread_clipmerge_skipadaptertrim ? "--disable_adapter_trimming" : "",
|
||||
params.shortread_clipmerge_adapter1 ? "--adapter_sequence ${params.shortread_clipmerge_adapter1}" : "",
|
||||
params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "",
|
||||
params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
|
||||
// filtering options
|
||||
"--length_required ${params.shortread_clipmerge_minlength}",
|
||||
"--length_required ${params.shortread_qc_minlength}",
|
||||
(params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp') ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : ''
|
||||
].join(' ').trim()
|
||||
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||
|
@ -69,13 +69,13 @@ process {
|
|||
withName: FASTP_PAIRED {
|
||||
ext.args = [
|
||||
// collapsing options - option to retain singletons
|
||||
params.shortread_clipmerge_excludeunmerged ? '' : "--include_unmerged",
|
||||
params.shortread_qc_excludeunmerged ? '' : "--include_unmerged",
|
||||
// trimming options
|
||||
params.shortread_clipmerge_skipadaptertrim ? "--disable_adapter_trimming" : "",
|
||||
params.shortread_clipmerge_adapter1 ? "--adapter_sequence ${params.shortread_clipmerge_adapter1}" : "",
|
||||
params.shortread_clipmerge_adapter2 ? "--adapter_sequence_r2 ${params.shortread_clipmerge_adapter2}" : "--detect_adapter_for_pe",
|
||||
params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "",
|
||||
params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
|
||||
params.shortread_qc_adapter2 ? "--adapter_sequence_r2 ${params.shortread_qc_adapter2}" : "--detect_adapter_for_pe",
|
||||
// filtering options
|
||||
"--length_required ${params.shortread_clipmerge_minlength}",
|
||||
"--length_required ${params.shortread_qc_minlength}",
|
||||
params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp' ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : ''
|
||||
].join(' ').trim()
|
||||
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||
|
@ -90,10 +90,10 @@ process {
|
|||
withName: ADAPTERREMOVAL_SINGLE {
|
||||
ext.args = [
|
||||
// trimming options
|
||||
params.shortread_clipmerge_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
|
||||
params.shortread_clipmerge_adapter1 ? "--adapter1 ${params.shortread_clipmerge_adapter1}" : "",
|
||||
params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
|
||||
params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "",
|
||||
// filtering options
|
||||
"--minlength ${params.shortread_clipmerge_minlength}"
|
||||
"--minlength ${params.shortread_qc_minlength}"
|
||||
].join(' ').trim()
|
||||
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||
publishDir = [
|
||||
|
@ -107,13 +107,13 @@ process {
|
|||
withName: ADAPTERREMOVAL_PAIRED {
|
||||
ext.args = [
|
||||
// collapsing options
|
||||
params.shortread_clipmerge_mergepairs ? "--collapse" : "",
|
||||
params.shortread_qc_mergepairs ? "--collapse" : "",
|
||||
// trimming options
|
||||
params.shortread_clipmerge_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
|
||||
params.shortread_clipmerge_adapter1 ? "--adapter1 ${params.shortread_clipmerge_adapter1}" : "",
|
||||
params.shortread_clipmerge_adapter2 ? "--adapter2 ${params.shortread_clipmerge_adapter2}" : "",
|
||||
params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
|
||||
params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "",
|
||||
params.shortread_qc_adapter2 ? "--adapter2 ${params.shortread_qc_adapter2}" : "",
|
||||
// filtering options
|
||||
"--minlength ${params.shortread_clipmerge_minlength}"
|
||||
"--minlength ${params.shortread_qc_minlength}"
|
||||
].join(' ').trim()
|
||||
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||
publishDir = [
|
||||
|
@ -134,6 +134,22 @@ process {
|
|||
]
|
||||
}
|
||||
|
||||
withName: FILTLONG {
|
||||
ext.args = [
|
||||
"--min_length ${params.longread_qc_minlength}",
|
||||
"--keep_percent ${params.longread_qc_keep_percent}",
|
||||
"--target_bases ${params.longread_qc_target_bases}"
|
||||
]
|
||||
.join(' ').trim()
|
||||
ext.prefix = { "${meta.id}_${meta.run_accession}_filtered" }
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/filtlong" },
|
||||
mode: params.publish_dir_mode,
|
||||
pattern: '*.fastq.gz',
|
||||
enabled: params.save_preprocessed_reads
|
||||
]
|
||||
}
|
||||
|
||||
withName: BOWTIE2_BUILD {
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/bowtie2/build" },
|
||||
|
|
|
@ -24,8 +24,8 @@ params {
|
|||
// TODO nf-core: Give any required params for the test so that command line flags are not needed
|
||||
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
|
||||
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
|
||||
perform_shortread_clipmerge = true
|
||||
perform_longread_clip = false
|
||||
perform_shortread_qc = true
|
||||
perform_longread_qc = true
|
||||
perform_shortread_complexityfilter = true
|
||||
perform_shortread_hostremoval = true
|
||||
perform_longread_hostremoval = true
|
||||
|
|
|
@ -24,8 +24,8 @@ params {
|
|||
// TODO nf-core: Give any required params for the test so that command line flags are not needed
|
||||
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
|
||||
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
|
||||
perform_shortread_clipmerge = false
|
||||
perform_longread_clip = false
|
||||
perform_shortread_qc = false
|
||||
perform_longread_qc = false
|
||||
perform_shortread_complexityfilter = false
|
||||
perform_shortread_hostremoval = false
|
||||
perform_longread_hostremoval = false
|
||||
|
|
|
@ -24,8 +24,8 @@ params {
|
|||
// TODO nf-core: Give any required params for the test so that command line flags are not needed
|
||||
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
|
||||
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
|
||||
perform_shortread_clipmerge = true
|
||||
perform_longread_clip = true
|
||||
perform_shortread_qc = true
|
||||
perform_longread_qc = true
|
||||
perform_shortread_complexityfilter = true
|
||||
perform_shortread_hostremoval = true
|
||||
perform_longread_hostremoval = true
|
||||
|
|
|
@ -164,16 +164,16 @@ nf-core/taxprofiler offers four main preprocessing steps
|
|||
|
||||
#### Read Processing
|
||||
|
||||
Raw sequencing read processing in the form of adapter clipping and paired-end read merging can be activated via the `--perform_shortread_clipmerge` or `--perform_longread_clip` flags.
|
||||
Raw sequencing read processing in the form of adapter clipping and paired-end read merging can be activated via the `--perform_shortread_qc` or `--perform_longread_qc` flags.
|
||||
|
||||
It is highly recommended to run this on raw reads to remove artefacts from sequencing that can cause false positive identification of taxa (e.g. contaminated reference genomes) and/or skews in taxonomic abundance profiles.
|
||||
|
||||
There are currently two options for short-read preprocessing: `fastp` or `adapterremoval`.
|
||||
|
||||
For adapter clipping, you can either rely on tool default adapter sequences, or supply your own adapters (`--shortread_clipmerge_adapter1` and `--shortread_clipmerge_adapter2`)
|
||||
By default, paired-end merging is not activated and paired-end profiling is performed where supported otherwise pairs will be independently profiled. If paired-end merging is activated you can also specify whether to exclude unmerged reads in the reads sent for profiling (`--shortread_clipmerge_mergepairs` and `--shortread_clipmerge_excludeunmerged`).
|
||||
You can also turn off clipping and only perform paired-end merging, if requested. This can be useful when processing data downloaded from the ENA, SRA, or DDBJ (`--shortread_clipmerge_skipadaptertrim`).
|
||||
Both tools support length filtering of reads and can be tuned with `--shortread_clipmerge_minlength`. Performing length filtering can be useful to remove short (often low sequencing complexity) sequences that result in unspecific classification and therefore slow down runtime during profiling, with minimal gain.
|
||||
For adapter clipping, you can either rely on tool default adapter sequences, or supply your own adapters (`--shortread_qc_adapter1` and `--shortread_qc_adapter2`)
|
||||
By default, paired-end merging is not activated and paired-end profiling is performed where supported otherwise pairs will be independently profiled. If paired-end merging is activated you can also specify whether to exclude unmerged reads in the reads sent for profiling (`--shortread_qc_mergepairs` and `--shortread_qc_excludeunmerged`).
|
||||
You can also turn off clipping and only perform paired-end merging, if requested. This can be useful when processing data downloaded from the ENA, SRA, or DDBJ (`--shortread_qc_skipadaptertrim`).
|
||||
Both tools support length filtering of reads and can be tuned with `--shortread_qc_minlength`. Performing length filtering can be useful to remove short (often low sequencing complexity) sequences that result in unspecific classification and therefore slow down runtime during profiling, with minimal gain.
|
||||
|
||||
There is currently one option for long-read Oxford Nanopore processing: `porechop`.
|
||||
|
||||
|
|
|
@ -36,6 +36,9 @@
|
|||
"fastqc": {
|
||||
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
||||
},
|
||||
"filtlong": {
|
||||
"git_sha": "089f761f0bf79c4a486f1df9b6205f650196a2c1"
|
||||
},
|
||||
"kaiju/kaiju": {
|
||||
"git_sha": "8856f127c58f6af479128be8b8df4d42e442ddbe"
|
||||
},
|
||||
|
|
37
modules/nf-core/modules/filtlong/main.nf
generated
Normal file
37
modules/nf-core/modules/filtlong/main.nf
generated
Normal file
|
@ -0,0 +1,37 @@
|
|||
process FILTLONG {
|
||||
tag "$meta.id"
|
||||
label 'process_low'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::filtlong=0.2.1" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/filtlong:0.2.1--h9a82719_0' :
|
||||
'quay.io/biocontainers/filtlong:0.2.1--h9a82719_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(shortreads), path(longreads)
|
||||
|
||||
output:
|
||||
tuple val(meta), path("*.fastq.gz"), emit: reads
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def short_reads = !shortreads ? "" : meta.single_end ? "-1 $shortreads" : "-1 ${shortreads[0]} -2 ${shortreads[1]}"
|
||||
if ("$longreads" == "${prefix}.fastq.gz") error "Longread FASTQ input and output names are the same, set prefix in module configuration to disambiguate!"
|
||||
"""
|
||||
filtlong \\
|
||||
$short_reads \\
|
||||
$args \\
|
||||
$longreads \\
|
||||
| gzip -n > ${prefix}.fastq.gz
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
filtlong: \$( filtlong --version | sed -e "s/Filtlong v//g" )
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
50
modules/nf-core/modules/filtlong/meta.yml
generated
Normal file
50
modules/nf-core/modules/filtlong/meta.yml
generated
Normal file
|
@ -0,0 +1,50 @@
|
|||
name: filtlong
|
||||
description: Filtlong filters long reads based on quality measures or short read data.
|
||||
keywords:
|
||||
- nanopore
|
||||
- quality control
|
||||
- QC
|
||||
- filtering
|
||||
- long reads
|
||||
- short reads
|
||||
tools:
|
||||
- filtlong:
|
||||
description: Filtlong is a tool for filtering long reads. It can take a set of long reads and produce a smaller, better subset. It uses both read length (longer is better) and read identity (higher is better) when choosing which reads pass the filter.
|
||||
homepage: https://anaconda.org/bioconda/filtlong
|
||||
documentation: None
|
||||
tool_dev_url: https://github.com/rrwick/Filtlong
|
||||
doi: ""
|
||||
licence: ["GPL v3"]
|
||||
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- shortreads:
|
||||
type: file
|
||||
description: fastq file
|
||||
pattern: "*.{fq,fastq,fq.gz,fastq.gz}"
|
||||
- longreads:
|
||||
type: file
|
||||
description: fastq file
|
||||
pattern: "*.{fq,fastq,fq.gz,fastq.gz}"
|
||||
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
- reads:
|
||||
type: file
|
||||
description: Filtered (compressed) fastq file
|
||||
pattern: "*.fastq.gz"
|
||||
|
||||
authors:
|
||||
- "@d4straub"
|
|
@ -55,16 +55,23 @@ params {
|
|||
databases = null
|
||||
|
||||
// FASTQ preprocessing
|
||||
perform_shortread_clipmerge = false
|
||||
shortread_clipmerge_tool = 'fastp'
|
||||
shortread_clipmerge_skipadaptertrim = false
|
||||
shortread_clipmerge_mergepairs = false
|
||||
shortread_clipmerge_excludeunmerged = false
|
||||
shortread_clipmerge_adapter1 = null
|
||||
shortread_clipmerge_adapter2 = null
|
||||
shortread_clipmerge_minlength = 15
|
||||
perform_longread_clip = false
|
||||
save_preprocessed_reads = false
|
||||
perform_shortread_qc = false
|
||||
shortread_qc_tool = 'fastp'
|
||||
shortread_qc_skipadaptertrim = false
|
||||
shortread_qc_mergepairs = false
|
||||
shortread_qc_excludeunmerged = false
|
||||
shortread_qc_adapter1 = null
|
||||
shortread_qc_adapter2 = null
|
||||
shortread_qc_minlength = 15
|
||||
|
||||
perform_longread_qc = false
|
||||
longread_qc_run_clip = false
|
||||
longread_qc_run_filter = false
|
||||
longread_qc_minlength = 1000
|
||||
longread_qc_keep_percent = 90
|
||||
longread_qc_target_bases = 500000000
|
||||
|
||||
save_preprocessed_reads = false
|
||||
|
||||
// Complexity filtering
|
||||
perform_shortread_complexityfilter = false
|
||||
|
@ -191,6 +198,7 @@ profiles {
|
|||
}
|
||||
|
||||
// Load igenomes.config if required
|
||||
|
||||
if (!params.igenomes_ignore) {
|
||||
includeConfig 'conf/igenomes.config'
|
||||
} else {
|
||||
|
|
|
@ -262,7 +262,7 @@
|
|||
"type": "string",
|
||||
"default": "None"
|
||||
},
|
||||
"shortread_clipmerge_excludeunmerged": {
|
||||
"shortread_qc_excludeunmerged": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"run_malt": {
|
||||
|
@ -291,26 +291,26 @@
|
|||
"type": "boolean",
|
||||
"description": "Enable MetaPhlAn for taxonomic profiling"
|
||||
},
|
||||
"shortread_clipmerge_tool": {
|
||||
"shortread_qc_tool": {
|
||||
"type": "string",
|
||||
"default": "fastp",
|
||||
"enum": ["fastp", "adapterremoval"]
|
||||
},
|
||||
"shortread_clipmerge_skipadaptertrim": {
|
||||
"shortread_qc_skipadaptertrim": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"shortread_clipmerge_mergepairs": {
|
||||
"shortread_qc_mergepairs": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"shortread_clipmerge_adapter1": {
|
||||
"shortread_qc_adapter1": {
|
||||
"type": "string",
|
||||
"default": "None"
|
||||
},
|
||||
"shortread_clipmerge_adapter2": {
|
||||
"shortread_qc_adapter2": {
|
||||
"type": "string",
|
||||
"default": "None"
|
||||
},
|
||||
"shortread_clipmerge_minlength": {
|
||||
"shortread_qc_minlength": {
|
||||
"type": "integer",
|
||||
"default": 15
|
||||
},
|
||||
|
@ -348,10 +348,10 @@
|
|||
"save_runmerged_reads": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"perform_shortread_clipmerge": {
|
||||
"perform_shortread_qc": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"perform_longread_clip": {
|
||||
"perform_longread_qc": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"perform_shortread_complexityfilter": {
|
||||
|
@ -409,6 +409,24 @@
|
|||
"shortread_complexityfilter_fastp_threshold": {
|
||||
"type": "integer",
|
||||
"default": 30
|
||||
},
|
||||
"longread_qc_run_clip": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"longread_qc_run_filter": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"longread_qc_minlength": {
|
||||
"type": "integer",
|
||||
"default": 1000
|
||||
},
|
||||
"longread_qc_keep_percent": {
|
||||
"type": "integer",
|
||||
"default": 90
|
||||
},
|
||||
"longread_qc_target_bases": {
|
||||
"type": "integer",
|
||||
"default": 500000000
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main'
|
||||
include { PORECHOP } from '../../modules/nf-core/modules/porechop/main'
|
||||
include { FILTLONG } from '../../modules/nf-core/modules/filtlong/main'
|
||||
|
||||
workflow LONGREAD_PREPROCESSING {
|
||||
take:
|
||||
|
@ -13,21 +14,43 @@ workflow LONGREAD_PREPROCESSING {
|
|||
ch_versions = Channel.empty()
|
||||
ch_multiqc_files = Channel.empty()
|
||||
|
||||
PORECHOP ( reads )
|
||||
if ( params.longread_qc_run_clip && !params.longread_qc_run_filter ) {
|
||||
PORECHOP ( reads )
|
||||
|
||||
ch_processed_reads = PORECHOP.out.reads
|
||||
.map {
|
||||
meta, reads ->
|
||||
def meta_new = meta.clone()
|
||||
meta_new['single_end'] = 1
|
||||
[ meta_new, reads ]
|
||||
}
|
||||
ch_processed_reads = PORECHOP.out.reads
|
||||
.map {
|
||||
meta, reads ->
|
||||
def meta_new = meta.clone()
|
||||
meta_new['single_end'] = 1
|
||||
[ meta_new, reads ]
|
||||
|
||||
FASTQC_PROCESSED ( PORECHOP.out.reads )
|
||||
ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
|
||||
ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
|
||||
}
|
||||
} else if ( !params.longread_qc_run_clip && params.longread_qc_run_filter ) {
|
||||
|
||||
ch_processed_reads = FILTLONG ( reads.map{ meta, reads -> [meta, [], reads ]} )
|
||||
ch_versions = ch_versions.mix(FILTLONG.out.versions.first())
|
||||
|
||||
} else {
|
||||
PORECHOP ( reads )
|
||||
ch_clipped_reads = PORECHOP.out.reads
|
||||
.map {
|
||||
meta, reads ->
|
||||
def meta_new = meta.clone()
|
||||
meta_new['single_end'] = 1
|
||||
[ meta_new, reads ]
|
||||
}
|
||||
|
||||
ch_processed_reads = FILTLONG ( ch_clipped_reads.map{ meta, reads -> [meta, [], reads ]} ).reads
|
||||
|
||||
ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
|
||||
ch_versions = ch_versions.mix(FILTLONG.out.versions.first())
|
||||
|
||||
}
|
||||
|
||||
FASTQC_PROCESSED ( ch_processed_reads.dump(tag: "filtlong") )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
|
||||
|
||||
|
||||
emit:
|
||||
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
|
||||
versions = ch_versions // channel: [ versions.yml ]
|
||||
|
|
|
@ -29,7 +29,7 @@ workflow SHORTREAD_ADAPTERREMOVAL {
|
|||
* has to be exported in a separate channel and we must manually recombine when necessary.
|
||||
*/
|
||||
|
||||
if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) {
|
||||
if ( params.shortread_qc_mergepairs && !params.shortread_qc_excludeunmerged ) {
|
||||
|
||||
ch_concat_fastq = Channel.empty()
|
||||
.mix(
|
||||
|
@ -54,7 +54,7 @@ workflow SHORTREAD_ADAPTERREMOVAL {
|
|||
ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
|
||||
.mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
|
||||
|
||||
} else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) {
|
||||
} else if ( params.shortread_qc_mergepairs && params.shortread_qc_excludeunmerged ) {
|
||||
|
||||
ch_concat_fastq = Channel.empty()
|
||||
.mix(
|
||||
|
|
|
@ -21,9 +21,9 @@ workflow SHORTREAD_FASTP {
|
|||
|
||||
FASTP_SINGLE ( ch_input_for_fastp.single, false, false )
|
||||
// Last parameter here turns on merging of PE data
|
||||
FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_clipmerge_mergepairs )
|
||||
FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_qc_mergepairs )
|
||||
|
||||
if ( params.shortread_clipmerge_mergepairs ) {
|
||||
if ( params.shortread_qc_mergepairs ) {
|
||||
ch_fastp_reads_prepped_pe = FASTP_PAIRED.out.reads_merged
|
||||
.map {
|
||||
meta, reads ->
|
||||
|
|
|
@ -15,11 +15,11 @@ workflow SHORTREAD_PREPROCESSING {
|
|||
ch_versions = Channel.empty()
|
||||
ch_multiqc_files = Channel.empty()
|
||||
|
||||
if ( params.shortread_clipmerge_tool == "fastp" ) {
|
||||
if ( params.shortread_qc_tool == "fastp" ) {
|
||||
ch_processed_reads = SHORTREAD_FASTP ( reads ).reads
|
||||
ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc )
|
||||
} else if ( params.shortread_clipmerge_tool == "adapterremoval" ) {
|
||||
} else if ( params.shortread_qc_tool == "adapterremoval" ) {
|
||||
ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads
|
||||
ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc )
|
||||
|
|
|
@ -20,10 +20,11 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true
|
|||
if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
|
||||
if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
|
||||
|
||||
if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files."
|
||||
if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "ERROR: [nf-core/taxprofiler] cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs"
|
||||
if (params.shortread_qc_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files."
|
||||
if (params.shortread_qc_excludeunmerged && !params.shortread_qc_mergepairs) exit 1, "ERROR: [nf-core/taxprofiler] cannot include unmerged reads when merging not turned on. Please specify --shortread_qc_mergepairs"
|
||||
if ( (params.longread_qc_run_clip || params.longread_qc_run_filter) & !params.perform_longread_qc ) exit 1, "ERROR: [nf-core/taxprofiler] --longread_qc_run_clip or --longread_qc_run_filter requested but quality-control not turned on. Please specify --perform_long_qc"
|
||||
|
||||
if (params.shortread_complexityfilter_tool == 'fastp' && ( params.perform_shortread_clipmerge == false || params.shortread_clipmerge_tool != 'fastp' )) exit 1, "ERROR: [nf-core/taxprofiler] cannot use fastp complexity filtering if preprocessing not turned on and/or tool is not fastp. Please specify --perform_shortread_clipmerge and/or --shortread_clipmerge_tool 'fastp'"
|
||||
if (params.shortread_complexityfilter_tool == 'fastp' && ( params.perform_shortread_qc == false || params.shortread_qc_tool != 'fastp' )) exit 1, "ERROR: [nf-core/taxprofiler] cannot use fastp complexity filtering if preprocessing not turned on and/or tool is not fastp. Please specify --perform_shortread_qc and/or --shortread_qc_tool 'fastp'"
|
||||
|
||||
if (params.perform_shortread_hostremoval && !params.hostremoval_reference) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval requested but no --hostremoval_reference FASTA supplied. Check input." }
|
||||
if (!params.hostremoval_reference && params.hostremoval_reference_index) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval_index provided but no --hostremoval_reference FASTA supplied. Check input." }
|
||||
|
@ -116,14 +117,14 @@ workflow TAXPROFILER {
|
|||
/*
|
||||
SUBWORKFLOW: PERFORM PREPROCESSING
|
||||
*/
|
||||
if ( params.perform_shortread_clipmerge ) {
|
||||
if ( params.perform_shortread_qc ) {
|
||||
ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq ).reads
|
||||
ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions )
|
||||
} else {
|
||||
ch_shortreads_preprocessed = INPUT_CHECK.out.fastq
|
||||
}
|
||||
|
||||
if ( params.perform_longread_clip ) {
|
||||
if ( params.perform_longread_qc ) {
|
||||
ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads
|
||||
.map { it -> [ it[0], [it[1]] ] }
|
||||
ch_versions = ch_versions.mix( LONGREAD_PREPROCESSING.out.versions )
|
||||
|
@ -227,11 +228,11 @@ workflow TAXPROFILER {
|
|||
|
||||
ch_multiqc_files = ch_multiqc_files.mix(ch_taxprofiler_logo.ifEmpty([]))
|
||||
|
||||
if (params.perform_shortread_clipmerge) {
|
||||
if (params.perform_shortread_qc) {
|
||||
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
|
||||
}
|
||||
|
||||
if (params.perform_longread_clip) {
|
||||
if (params.perform_longread_qc) {
|
||||
ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue