1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-26 06:19:55 +00:00

Merge pull request #36 from nf-core/complexity-filter-bbduk

Add bbduk and prinseq complexity (entropy or dust-based) short read filtering
This commit is contained in:
James A. Fellows Yates 2022-04-05 16:45:40 +02:00 committed by GitHub
commit e72be724a6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 416 additions and 63 deletions

View file

@ -29,8 +29,16 @@ jobs:
- NXF_VER: "" - NXF_VER: ""
NXF_EDGE: "1" NXF_EDGE: "1"
parameters: parameters:
- "--longread_clip false"
- "--shortread_clip false"
- "--shortread_clipmerge_tool fastp" - "--shortread_clipmerge_tool fastp"
- "--shortread_clipmerge_tool fastp --shortread_clipmerge_mergepairs --shortread_clipmerge_excludeunmerged"
- "--shortread_clipmerge_tool fastp --shortread_clipmerge_mergepairs"
- "--shortread_clipmerge_tool adapterremoval" - "--shortread_clipmerge_tool adapterremoval"
- "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs --shortread_clipmerge_excludeunmerged"
- "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs"
- "--shortread_complexityfilter_tool bbduk"
- "--shortread_complexityfilter_tool prinseq"
steps: steps:
- name: Check out pipeline code - name: Check out pipeline code

View file

@ -18,21 +18,27 @@
- [fastp](https://doi.org/10.1093/bioinformatics/bty560) - [fastp](https://doi.org/10.1093/bioinformatics/bty560)
> Chen, Shifu, Yanqing Zhou, Yaru Chen, and Jia Gu. 2018. Fastp: An Ultra-Fast All-in-One FASTQ Preprocessor. Bioinformatics 34 (17): i884-90. 10.1093/bioinformatics/bty560. > Chen, Shifu, Yanqing Zhou, Yaru Chen, and Jia Gu. 2018. Fastp: An Ultra-Fast All-in-One FASTQ Preprocessor. Bioinformatics 34 (17): i884-90. 10.1093/bioinformatics/bty560.
- [AdapterRemoval2](https://doi.org/10.1186/s13104-016-1900-2) - [AdapterRemoval2](https://doi.org/10.1186/s13104-016-1900-2)
> Schubert, Mikkel, Stinus Lindgreen, and Ludovic Orlando. 2016. AdapterRemoval v2: Rapid Adapter Trimming, Identification, and Read Merging. BMC Research Notes 9 (February): 88. doi:10.1186/s13104-016-1900-2. > Schubert, Mikkel, Stinus Lindgreen, and Ludovic Orlando. 2016. AdapterRemoval v2: Rapid Adapter Trimming, Identification, and Read Merging. BMC Research Notes 9 (February): 88. doi:10.1186/s13104-016-1900-2.
- [Porechop](https://github.com/rrwick/Porechop) - [Porechop](https://github.com/rrwick/Porechop)
- [BBTools](http://sourceforge.net/projects/bbmap/)
- [PRINSEQ++](https://doi.org/10.7287/peerj.preprints.27553v1)
> Cantu, Vito Adrian, Jeffrey Sadural, and Robert Edwards. 2019. PRINSEQ++, a Multi-Threaded Tool for Fast and Efficient Quality Control and Preprocessing of Sequencing Datasets. e27553v1. PeerJ Preprints. doi: 10.7287/peerj.preprints.27553v1.
- [Kraken2](https://doi.org/10.1186/s13059-019-1891-0) - [Kraken2](https://doi.org/10.1186/s13059-019-1891-0)
> Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. “Improved Metagenomic Analysis with Kraken 2.” Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0. > Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. Improved Metagenomic Analysis with Kraken 2. Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0.
- [MALT](https://doi.org/10.1038/s41559-017-0446-6) - [MALT](https://doi.org/10.1038/s41559-017-0446-6)
> Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico. Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6. > Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico. Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6.
- [MetaPhlAn3](https://doi.org/10.7554/eLife.65088) - [MetaPhlAn3](https://doi.org/10.7554/eLife.65088)

View file

@ -132,7 +132,6 @@ process {
] ]
} }
withName: PORECHOP { withName: PORECHOP {
ext.prefix = { "${meta.id}_${meta.run_accession}" } ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [ publishDir = [
@ -142,11 +141,30 @@ process {
] ]
} }
withName: CAT_FASTQ { withName: BBMAP_BBDUK {
ext.args = [
"entropy=${params.shortread_complexityfilter_entropy}",
"entropywindow=${params.shortread_complexityfilter_bbduk_windowsize}",
params.shortread_complexityfilter_bbduk_mask ? "entropymask=t" : "entropymask=f"
].join(' ').trim()
ext.prefix = { "${meta.id}-${meta.run_accession}" }
publishDir = [ publishDir = [
path: { "${params.outdir}/prepared_sequences" }, path: { "${params.outdir}/bbduk/" },
mode: 'copy', mode: params.publish_dir_mode,
pattern: '*.fastq.gz' pattern: '*.{fastq.gz,log}'
]
}
withName: PRINSEQPLUSPLUS {
ext.args = [
params.shortread_complexityfilter_prinseqplusplus_mode == 'dust' ? "-lc_dust=${params.shortread_complexityfilter_prinseqplusplus_dustscore}" : "-lc_entropy=${params.shortread_complexityfilter_entropy}",
"-trim_qual_left=0 -trim_qual_left=0 -trim_qual_window=0 -trim_qual_step=0"
].join(' ').trim()
ext.prefix = { "${meta.id}-${meta.run_accession}" }
publishDir = [
path: { "${params.outdir}/prinseqplusplus/" },
mode: params.publish_dir_mode,
pattern: '*{_good_out.fastq.gz,_good_out_R1.fastq.gz,_good_out_R2.fastq.gz,log}'
] ]
} }

View file

@ -28,5 +28,6 @@ params {
run_malt = true run_malt = true
run_metaphlan3 = true run_metaphlan3 = true
shortread_clipmerge = true shortread_clipmerge = true
longread_clip = false
shortread_complexityfilter = true
} }

View file

@ -6,6 +6,9 @@
"adapterremoval": { "adapterremoval": {
"git_sha": "879d42c5e28661fe0a5e744c9e2c515868f9e08a" "git_sha": "879d42c5e28661fe0a5e744c9e2c515868f9e08a"
}, },
"bbmap/bbduk": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
},
"cat/fastq": { "cat/fastq": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
}, },
@ -33,6 +36,9 @@
"porechop": { "porechop": {
"git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046"
}, },
"prinseqplusplus": {
"git_sha": "f1c5384c31e985591716afdd732cf8c2ae29d05b"
},
"untar": { "untar": {
"git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918" "git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918"
} }

View file

@ -0,0 +1,43 @@
process BBMAP_BBDUK {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::bbmap=38.90" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bbmap:38.90--he522d1c_1' :
'quay.io/biocontainers/bbmap:38.90--he522d1c_1' }"
input:
tuple val(meta), path(reads)
path contaminants
output:
tuple val(meta), path('*.fastq.gz'), emit: reads
tuple val(meta), path('*.log') , emit: log
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def raw = meta.single_end ? "in=${reads[0]}" : "in1=${reads[0]} in2=${reads[1]}"
def trimmed = meta.single_end ? "out=${prefix}.fastq.gz" : "out1=${prefix}_1.fastq.gz out2=${prefix}_2.fastq.gz"
def contaminants_fa = contaminants ? "ref=$contaminants" : ''
"""
maxmem=\$(echo \"$task.memory\"| sed 's/ GB/g/g')
bbduk.sh \\
-Xmx\$maxmem \\
$raw \\
$trimmed \\
threads=$task.cpus \\
$args \\
$contaminants_fa \\
&> ${prefix}.bbduk.log
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bbmap: \$(bbversion.sh)
END_VERSIONS
"""
}

View file

@ -0,0 +1,52 @@
name: bbmap_bbduk
description: Adapter and quality trimming of sequencing reads
keywords:
- trimming
- adapter trimming
- quality trimming
tools:
- bbmap:
description: BBMap is a short read aligner, as well as various other bioinformatic tools.
homepage: https://jgi.doe.gov/data-and-tools/bbtools/bb-tools-user-guide/
documentation: https://jgi.doe.gov/data-and-tools/bbtools/bb-tools-user-guide/
tool_dev_url: None
doi: ""
licence: ["UC-LBL license (see package)"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
- contaminants:
type: file
description: |
Reference files containing adapter and/or contaminant sequences for sequence kmer matching
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: The trimmed/modified fastq reads
pattern: "*fastq.gz"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- log:
type: file
description: Bbduk log file
pattern: "*bbduk.log"
authors:
- "@MGordon09"

View file

@ -0,0 +1,61 @@
process PRINSEQPLUSPLUS {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::prinseq-plus-plus=1.2.3" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/prinseq-plus-plus:1.2.3--hc90279e_1':
'quay.io/biocontainers/prinseq-plus-plus:1.2.3--hc90279e_1' }"
input:
tuple val(meta), path(reads)
output:
tuple val(meta), path("*_good_out*.fastq.gz") , emit: good_reads
tuple val(meta), path("*_single_out*.fastq.gz"), optional: true, emit: single_reads
tuple val(meta), path("*_bad_out*.fastq.gz") , optional: true, emit: bad_reads
tuple val(meta), path("*.log") , emit: log
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
if (meta.single_end) {
"""
prinseq++ \\
-threads $task.cpus \\
-fastq ${reads} \\
-out_name ${prefix} \\
-out_gz \\
-VERBOSE 1 \\
$args \\
| tee ${prefix}.log
cat <<-END_VERSIONS > versions.yml
"${task.process}":
prinseqplusplus: \$(echo \$(prinseq++ --version | cut -f 2 -d ' ' ))
END_VERSIONS
"""
} else {
"""
prinseq++ \\
-threads $task.cpus \\
-fastq ${reads[0]} \\
-fastq2 ${reads[1]} \\
-out_name ${prefix} \\
-out_gz \\
-VERBOSE 1 \\
$args \\
| tee ${prefix}.log
cat <<-END_VERSIONS > versions.yml
"${task.process}":
prinseqplusplus: \$(echo \$(prinseq++ --version | cut -f 2 -d ' ' ))
END_VERSIONS
"""
}
}

View file

@ -0,0 +1,60 @@
name: "prinseqplusplus"
description: PRINSEQ++ is a C++ implementation of the prinseq-lite.pl program. It can be used to filter, reformat or trim genomic and metagenomic sequence data
keywords:
- fastq
- fasta
- filter
- trim
tools:
- "prinseqplusplus":
description: "PRINSEQ++ - Multi-threaded C++ sequence cleaning"
homepage: "https://github.com/Adrian-Cantu/PRINSEQ-plus-plus"
documentation: "https://github.com/Adrian-Cantu/PRINSEQ-plus-plus"
tool_dev_url: "https://github.com/Adrian-Cantu/PRINSEQ-plus-plus"
doi: "10.7287/peerj.preprints.27553v1"
licence: "['GPL v2']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end
data, respectively.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- good_reads:
type: file
description: Reads passing filter(s) in gzipped FASTQ format
pattern: "*_good_out_{R1,R2}.fastq.gz"
- single_reads:
type: file
description: |
Single reads without the pair passing filter(s) in gzipped FASTQ format
pattern: "*_single_out_{R1,R2}.fastq.gz"
- bad_reads:
type: file
description: |
Reads without not passing filter(s) in gzipped FASTQ format
pattern: "*_bad_out_{R1,R2}.fastq.gz"
- log:
type: file
description: |
Verbose level 2 STDOUT information in a log file
pattern: "*.log"
authors:
- "@jfy133"

View file

@ -51,7 +51,7 @@ params {
max_cpus = 16 max_cpus = 16
max_time = '240.h' max_time = '240.h'
// Databaess // Databases
databases = null databases = null
// FASTQ preprocessing // FASTQ preprocessing
@ -65,6 +65,16 @@ params {
shortread_clipmerge_minlength = 15 shortread_clipmerge_minlength = 15
longread_clip = false longread_clip = false
// Complexity filtering
shortread_complexityfilter = false
shortread_complexityfilter_tool = 'bbduk'
shortread_complexityfilter_entropy = 0.3
shortread_complexityfilter_bbduk_windowsize = 50
shortread_complexityfilter_bbduk_mask = false
shortread_complexityfilter_prinseqplusplus_mode = 'entropy'
shortread_complexityfilter_prinseqplusplus_dustscore = 0.5
// MALT // MALT
run_malt = false run_malt = false
malt_mode = 'BlastN' malt_mode = 'BlastN'

View file

@ -266,8 +266,7 @@
"type": "boolean" "type": "boolean"
}, },
"shortread_clipmerge_excludeunmerged": { "shortread_clipmerge_excludeunmerged": {
"type": "boolean", "type": "boolean"
"default": false
}, },
"longread_clip": { "longread_clip": {
"type": "boolean" "type": "boolean"
@ -308,6 +307,33 @@
"shortread_clipmerge_minlength": { "shortread_clipmerge_minlength": {
"type": "integer", "type": "integer",
"default": 15 "default": 15
},
"shortread_complexityfilter_tool": {
"type": "string",
"default": "bbduk"
},
"shortread_complexityfilter_bbduk_windowsize": {
"type": "integer",
"default": 50
},
"shortread_complexityfilter_bbduk_mask": {
"type": "boolean"
},
"shortread_complexityfilter": {
"type": "boolean"
},
"shortread_complexityfilter_entropy": {
"type": "number",
"default": 0.3
},
"shortread_complexityfilter_prinseqplusplus_mode": {
"type": "string",
"default": "entropy",
"enum": ["entropy", "dust"]
},
"shortread_complexityfilter_prinseqplusplus_dustscore": {
"type": "number",
"default": 0.5
} }
} }
} }

View file

@ -1,6 +1,6 @@
/* //
Process long raw reads with porechop // Process long raw reads with porechop
*/ //
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main' include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main'
include { PORECHOP } from '../../modules/nf-core/modules/porechop/main' include { PORECHOP } from '../../modules/nf-core/modules/porechop/main'
@ -25,7 +25,7 @@ workflow LONGREAD_PREPROCESSING {
FASTQC_PROCESSED ( PORECHOP.out.reads ) FASTQC_PROCESSED ( PORECHOP.out.reads )
ch_versions = ch_versions.mix(PORECHOP.out.versions.first()) ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip.collect{it[1]} ) ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
emit: emit:

View file

@ -1,6 +1,6 @@
/* //
Process short raw reads with AdapterRemoval // Process short raw reads with AdapterRemoval
*/ //
include { ADAPTERREMOVAL as ADAPTERREMOVAL_SINGLE } from '../../modules/nf-core/modules/adapterremoval/main' include { ADAPTERREMOVAL as ADAPTERREMOVAL_SINGLE } from '../../modules/nf-core/modules/adapterremoval/main'
include { ADAPTERREMOVAL as ADAPTERREMOVAL_PAIRED } from '../../modules/nf-core/modules/adapterremoval/main' include { ADAPTERREMOVAL as ADAPTERREMOVAL_PAIRED } from '../../modules/nf-core/modules/adapterremoval/main'
@ -38,11 +38,17 @@ workflow SHORTREAD_ADAPTERREMOVAL {
ADAPTERREMOVAL_PAIRED.out.singles_truncated, ADAPTERREMOVAL_PAIRED.out.singles_truncated,
ADAPTERREMOVAL_PAIRED.out.paired_truncated ADAPTERREMOVAL_PAIRED.out.paired_truncated
) )
.map { meta, reads ->
def meta_new = meta.clone()
meta_new.single_end = true
[meta_new, reads]
}
.groupTuple() .groupTuple()
// Paired-end reads cause a nested tuple during grouping. // Paired-end reads cause a nested tuple during grouping.
// We want to present a flat list of files to `CAT_FASTQ`. // We want to present a flat list of files to `CAT_FASTQ`.
.map { meta, fastq -> [meta, fastq.flatten()] } .map { meta, fastq -> [meta, fastq.flatten()] }
CAT_FASTQ(ch_concat_fastq) CAT_FASTQ(ch_concat_fastq)
ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
@ -56,10 +62,13 @@ workflow SHORTREAD_ADAPTERREMOVAL {
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated ADAPTERREMOVAL_PAIRED.out.collapsed_truncated
) )
.map { meta, reads -> .map { meta, reads ->
meta.single_end = true def meta_new = meta.clone()
[meta, reads] meta_new.single_end = true
[meta_new, reads]
} }
.groupTuple() .groupTuple()
.map { meta, fastq -> [meta, fastq.flatten()] }
CAT_FASTQ(ch_concat_fastq) CAT_FASTQ(ch_concat_fastq)
@ -75,9 +84,10 @@ workflow SHORTREAD_ADAPTERREMOVAL {
ch_versions = ch_versions.mix( ADAPTERREMOVAL_SINGLE.out.versions.first() ) ch_versions = ch_versions.mix( ADAPTERREMOVAL_SINGLE.out.versions.first() )
ch_versions = ch_versions.mix( ADAPTERREMOVAL_PAIRED.out.versions.first() ) ch_versions = ch_versions.mix( ADAPTERREMOVAL_PAIRED.out.versions.first() )
ch_multiqc_files = ch_multiqc_files.mix( ch_multiqc_files = ch_multiqc_files.mix(
ADAPTERREMOVAL_PAIRED.out.settings.collect{it[1]}, ADAPTERREMOVAL_PAIRED.out.settings,
ADAPTERREMOVAL_SINGLE.out.settings.collect{it[1]} ADAPTERREMOVAL_SINGLE.out.settings
) )
emit: emit:

View file

@ -0,0 +1,32 @@
//
// Check input samplesheet and get read channels
//
include { BBMAP_BBDUK } from '../../modules/nf-core/modules/bbmap/bbduk/main'
include { PRINSEQPLUSPLUS } from '../../modules/nf-core/modules/prinseqplusplus/main'
workflow SHORTREAD_COMPLEXITYFILTERING {
take:
reads // [ [ meta ], [ reads ] ]
main:
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()
if ( params.shortread_complexityfilter_tool == 'bbduk' ) {
ch_filtered_reads = BBMAP_BBDUK ( reads, [] ).reads
ch_versions = ch_versions.mix( BBMAP_BBDUK.out.versions.first() )
ch_multiqc_files = ch_multiqc_files.mix( BBMAP_BBDUK.out.log )
} else if ( params.shortread_complexityfilter_tool == 'prinseqplusplus' ) {
ch_filtered_reads = PRINSEQPLUSPLUS ( reads ).good_reads
ch_versions = ch_versions.mix( PRINSEQPLUSPLUS.out.versions.first() )
} else {
ch_filtered_reads = reads
}
emit:
reads = ch_filtered_reads // channel: [ val(meta), [ reads ] ]
versions = ch_versions // channel: [ versions.yml ]
mqc = ch_multiqc_files
}

View file

@ -1,6 +1,6 @@
/* //
Process short raw reads with FastP // Process short raw reads with FastP
*/ //
include { FASTP as FASTP_SINGLE } from '../../modules/nf-core/modules/fastp/main' include { FASTP as FASTP_SINGLE } from '../../modules/nf-core/modules/fastp/main'
include { FASTP as FASTP_PAIRED } from '../../modules/nf-core/modules/fastp/main' include { FASTP as FASTP_PAIRED } from '../../modules/nf-core/modules/fastp/main'
@ -44,8 +44,8 @@ workflow SHORTREAD_FASTP {
ch_processed_reads = ch_fastp_reads_prepped ch_processed_reads = ch_fastp_reads_prepped
ch_multiqc_files = ch_multiqc_files.mix( FASTP_SINGLE.out.json.collect{it[1]} ) ch_multiqc_files = ch_multiqc_files.mix( FASTP_SINGLE.out.json )
ch_multiqc_files = ch_multiqc_files.mix( FASTP_PAIRED.out.json.collect{it[1]} ) ch_multiqc_files = ch_multiqc_files.mix( FASTP_PAIRED.out.json )
emit: emit:
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ] reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]

View file

@ -1,5 +1,5 @@
// //
// Check input samplesheet and get read channels // Perform read trimming and merging
// //
@ -9,7 +9,7 @@ include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules
workflow SHORTREAD_PREPROCESSING { workflow SHORTREAD_PREPROCESSING {
take: take:
reads // file: /path/to/samplesheet.csv reads // [ [ meta ], [ reads ] ]
main: main:
ch_versions = Channel.empty() ch_versions = Channel.empty()
@ -29,7 +29,7 @@ workflow SHORTREAD_PREPROCESSING {
FASTQC_PROCESSED ( ch_processed_reads ) FASTQC_PROCESSED ( ch_processed_reads )
ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions ) ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip.collect{it[1]} ) ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
emit: emit:
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ] reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]

View file

@ -17,7 +17,7 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true
// Check mandatory parameters // Check mandatory parameters
if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' } if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] warning: MALT does not except uncollapsed paired-reads. Pairs will be profiled as separate files." if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] warning: MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files."
if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "[nf-core/taxprofiler] error: cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs" if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "[nf-core/taxprofiler] error: cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs"
/* /*
@ -43,6 +43,7 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check'
include { DB_CHECK } from '../subworkflows/local/db_check' include { DB_CHECK } from '../subworkflows/local/db_check'
include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing' include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing'
include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing' include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing'
include { SHORTREAD_COMPLEXITYFILTERING } from '../subworkflows/local/shortread_complexityfiltering'
/* /*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -90,7 +91,7 @@ workflow TAXPROFILER {
/* /*
MODULE: Run FastQC MODULE: Run FastQC
*/ */
ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore ).dump(tag: "input_to_fastq") ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore )
FASTQC ( FASTQC (
ch_input_for_fastqc ch_input_for_fastqc
@ -98,10 +99,6 @@ workflow TAXPROFILER {
ch_versions = ch_versions.mix(FASTQC.out.versions.first()) ch_versions = ch_versions.mix(FASTQC.out.versions.first())
CUSTOM_DUMPSOFTWAREVERSIONS (
ch_versions.unique().collectFile(name: 'collated_versions.yml')
)
/* /*
SUBWORKFLOW: PERFORM PREPROCESSING SUBWORKFLOW: PERFORM PREPROCESSING
*/ */
@ -114,17 +111,26 @@ workflow TAXPROFILER {
if ( params.longread_clip ) { if ( params.longread_clip ) {
ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads
.map { it -> [ it[0], [it[1]] ] } .map { it -> [ it[0], [it[1]] ] }
ch_versions = ch_versions.mix(LONGREAD_PREPROCESSING.out.versions.first())
} else { } else {
ch_longreads_preprocessed = INPUT_CHECK.out.nanopore ch_longreads_preprocessed = INPUT_CHECK.out.nanopore
} }
/*
SUBWORKFLOW: COMPLEXITY FILTERING
*/
if ( params.shortread_complexityfilter ) {
ch_shortreads_filtered = SHORTREAD_COMPLEXITYFILTERING ( ch_shortreads_preprocessed ).reads
} else {
ch_shortreads_filtered = ch_shortreads_preprocessed
}
/* /*
COMBINE READS WITH POSSIBLE DATABASES COMBINE READS WITH POSSIBLE DATABASES
*/ */
// e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90] // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
ch_input_for_profiling = ch_shortreads_preprocessed ch_input_for_profiling = ch_shortreads_filtered
.mix( ch_longreads_preprocessed ) .mix( ch_longreads_preprocessed )
.combine(DB_CHECK.out.dbs) .combine(DB_CHECK.out.dbs)
.branch { .branch {
@ -165,7 +171,6 @@ workflow TAXPROFILER {
} }
ch_input_for_metaphlan3 = ch_input_for_profiling.metaphlan3 ch_input_for_metaphlan3 = ch_input_for_profiling.metaphlan3
.dump(tag: "input_metaphlan3")
.multiMap { .multiMap {
it -> it ->
reads: [it[0] + it[2], it[1]] reads: [it[0] + it[2], it[1]]
@ -190,6 +195,12 @@ workflow TAXPROFILER {
/* /*
MODULE: MultiQC MODULE: MultiQC
*/ */
CUSTOM_DUMPSOFTWAREVERSIONS (
ch_versions.unique().collectFile(name: 'collated_versions.yml')
)
workflow_summary = WorkflowTaxprofiler.paramsSummaryMultiqc(workflow, summary_params) workflow_summary = WorkflowTaxprofiler.paramsSummaryMultiqc(workflow, summary_params)
ch_workflow_summary = Channel.value(workflow_summary) ch_workflow_summary = Channel.value(workflow_summary)
@ -201,21 +212,30 @@ workflow TAXPROFILER {
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
if (params.shortread_clipmerge) { if (params.shortread_clipmerge) {
ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_PREPROCESSING.out.mqc) ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions )
} }
if (params.longread_clip) { if (params.longread_clip) {
ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_PREPROCESSING.out.mqc) ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( LONGREAD_PREPROCESSING.out.versions )
} }
if (params.shortread_complexityfilter){
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_COMPLEXITYFILTERING.out.mqc.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( SHORTREAD_COMPLEXITYFILTERING.out.versions )
}
if (params.run_kraken2) { if (params.run_kraken2) {
ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) ) ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() ) ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() )
} }
if (params.run_malt) { if (params.run_malt) {
ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) ) ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( MALT_RUN.out.versions.first() ) ch_versions = ch_versions.mix( MALT_RUN.out.versions.first() )
} }
// TODO MALT results overwriting per database?
// TODO Versions for Karken/MALT not report? // TODO Versions for Karken/MALT not report?
// TODO create multiQC module for metaphlan // TODO create multiQC module for metaphlan
MULTIQC ( MULTIQC (