1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-25 23:59:55 +00:00

Check if PR works with centrifuge/centrifuge module

This commit is contained in:
sofstam 2022-04-07 16:13:17 +02:00
commit 606e5da7d5
25 changed files with 763 additions and 179 deletions

View file

@ -29,8 +29,16 @@ jobs:
- NXF_VER: "" - NXF_VER: ""
NXF_EDGE: "1" NXF_EDGE: "1"
parameters: parameters:
- "--longread_clip false"
- "--shortread_clip false"
- "--shortread_clipmerge_tool fastp" - "--shortread_clipmerge_tool fastp"
- "--shortread_clipmerge_tool fastp --shortread_clipmerge_mergepairs --shortread_clipmerge_excludeunmerged"
- "--shortread_clipmerge_tool fastp --shortread_clipmerge_mergepairs"
- "--shortread_clipmerge_tool adapterremoval" - "--shortread_clipmerge_tool adapterremoval"
- "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs --shortread_clipmerge_excludeunmerged"
- "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs"
- "--shortread_complexityfilter_tool bbduk"
- "--shortread_complexityfilter_tool prinseq"
steps: steps:
- name: Check out pipeline code - name: Check out pipeline code

View file

@ -18,21 +18,27 @@
- [fastp](https://doi.org/10.1093/bioinformatics/bty560) - [fastp](https://doi.org/10.1093/bioinformatics/bty560)
> Chen, Shifu, Yanqing Zhou, Yaru Chen, and Jia Gu. 2018. Fastp: An Ultra-Fast All-in-One FASTQ Preprocessor. Bioinformatics 34 (17): i884-90. 10.1093/bioinformatics/bty560. > Chen, Shifu, Yanqing Zhou, Yaru Chen, and Jia Gu. 2018. Fastp: An Ultra-Fast All-in-One FASTQ Preprocessor. Bioinformatics 34 (17): i884-90. 10.1093/bioinformatics/bty560.
- [AdapterRemoval2](https://doi.org/10.1186/s13104-016-1900-2) - [AdapterRemoval2](https://doi.org/10.1186/s13104-016-1900-2)
> Schubert, Mikkel, Stinus Lindgreen, and Ludovic Orlando. 2016. AdapterRemoval v2: Rapid Adapter Trimming, Identification, and Read Merging. BMC Research Notes 9 (February): 88. doi:10.1186/s13104-016-1900-2. > Schubert, Mikkel, Stinus Lindgreen, and Ludovic Orlando. 2016. AdapterRemoval v2: Rapid Adapter Trimming, Identification, and Read Merging. BMC Research Notes 9 (February): 88. doi:10.1186/s13104-016-1900-2.
- [Porechop](https://github.com/rrwick/Porechop) - [Porechop](https://github.com/rrwick/Porechop)
- [BBTools](http://sourceforge.net/projects/bbmap/)
- [PRINSEQ++](https://doi.org/10.7287/peerj.preprints.27553v1)
> Cantu, Vito Adrian, Jeffrey Sadural, and Robert Edwards. 2019. PRINSEQ++, a Multi-Threaded Tool for Fast and Efficient Quality Control and Preprocessing of Sequencing Datasets. e27553v1. PeerJ Preprints. doi: 10.7287/peerj.preprints.27553v1.
- [Kraken2](https://doi.org/10.1186/s13059-019-1891-0) - [Kraken2](https://doi.org/10.1186/s13059-019-1891-0)
> Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. “Improved Metagenomic Analysis with Kraken 2.” Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0. > Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. Improved Metagenomic Analysis with Kraken 2. Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0.
- [MALT](https://doi.org/10.1038/s41559-017-0446-6) - [MALT](https://doi.org/10.1038/s41559-017-0446-6)
> Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico. Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6. > Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico. Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6.
- [MetaPhlAn3](https://doi.org/10.7554/eLife.65088) - [MetaPhlAn3](https://doi.org/10.7554/eLife.65088)

View file

@ -12,12 +12,6 @@
process { process {
publishDir = [
path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
withName: SAMPLESHEET_CHECK { withName: SAMPLESHEET_CHECK {
publishDir = [ publishDir = [
path: { "${params.outdir}/pipeline_info" }, path: { "${params.outdir}/pipeline_info" },
@ -34,20 +28,12 @@ process {
] ]
} }
withName: UNTAR {
publishDir = [
path: { "${params.outdir}/databases" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
withName: FASTQC { withName: FASTQC {
ext.args = '--quiet' ext.args = '--quiet'
ext.prefix = { "${meta.id}_${meta.run_accession}_raw" } ext.prefix = { "${meta.id}_${meta.run_accession}_raw" }
publishDir = [ publishDir = [
path: { "${params.outdir}/fastqc/raw" }, path: { "${params.outdir}/fastqc/raw" },
mode: 'copy', mode: params.publish_dir_mode,
pattern: '*.html' pattern: '*.html'
] ]
} }
@ -57,7 +43,7 @@ process {
ext.prefix = { "${meta.id}_${meta.run_accession}_processed" } ext.prefix = { "${meta.id}_${meta.run_accession}_processed" }
publishDir = [ publishDir = [
path: { "${params.outdir}/fastqc/processed" }, path: { "${params.outdir}/fastqc/processed" },
mode: 'copy', mode: params.publish_dir_mode,
pattern: '*.html' pattern: '*.html'
] ]
} }
@ -73,8 +59,9 @@ process {
ext.prefix = { "${meta.id}_${meta.run_accession}" } ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [ publishDir = [
path: { "${params.outdir}/fastp" }, path: { "${params.outdir}/fastp" },
mode: 'copy', mode: params.publish_dir_mode,
pattern: '*.fastq.gz' pattern: '*.fastq.gz',
enabled: params.save_preprocessed_reads
] ]
} }
@ -92,8 +79,9 @@ process {
ext.prefix = { "${meta.id}_${meta.run_accession}" } ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [ publishDir = [
path: { "${params.outdir}/fastp" }, path: { "${params.outdir}/fastp" },
mode: 'copy', mode: params.publish_dir_mode,
pattern: '*.fastq.gz' pattern: '*.fastq.gz',
enabled: params.save_preprocessed_reads
] ]
} }
@ -108,8 +96,9 @@ process {
ext.prefix = { "${meta.id}_${meta.run_accession}" } ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [ publishDir = [
path: { "${params.outdir}/adapterremoval" }, path: { "${params.outdir}/adapterremoval" },
mode: 'copy', mode: params.publish_dir_mode,
pattern: '*.fastq.gz' pattern: '*.fastq.gz',
enabled: params.save_preprocessed_reads
] ]
} }
@ -127,26 +116,48 @@ process {
ext.prefix = { "${meta.id}_${meta.run_accession}" } ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [ publishDir = [
path: { "${params.outdir}/adapterremoval" }, path: { "${params.outdir}/adapterremoval" },
mode: 'copy', mode: params.publish_dir_mode,
pattern: '*.fastq.gz' pattern: '*.fastq.gz',
enabled: params.save_preprocessed_reads
] ]
} }
withName: PORECHOP { withName: PORECHOP {
ext.prefix = { "${meta.id}_${meta.run_accession}" } ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [ publishDir = [
path: { "${params.outdir}/porechop" }, path: { "${params.outdir}/porechop" },
mode: 'copy', mode: params.publish_dir_mode,
pattern: '*.fastq.gz' pattern: '*.fastq.gz',
enabled: params.save_preprocessed_reads
] ]
} }
withName: CAT_FASTQ { withName: BBMAP_BBDUK {
ext.args = [
"entropy=${params.shortread_complexityfilter_entropy}",
"entropywindow=${params.shortread_complexityfilter_bbduk_windowsize}",
params.shortread_complexityfilter_bbduk_mask ? "entropymask=t" : "entropymask=f"
].join(' ').trim()
ext.prefix = { "${meta.id}-${meta.run_accession}" }
publishDir = [ publishDir = [
path: { "${params.outdir}/prepared_sequences" }, path: { "${params.outdir}/bbduk/" },
mode: 'copy', mode: params.publish_dir_mode,
pattern: '*.fastq.gz' pattern: '*.{fastq.gz,log}',
enabled: params.save_complexityfiltered_reads
]
}
withName: PRINSEQPLUSPLUS {
ext.args = [
params.shortread_complexityfilter_prinseqplusplus_mode == 'dust' ? "-lc_dust=${params.shortread_complexityfilter_prinseqplusplus_dustscore}" : "-lc_entropy=${params.shortread_complexityfilter_entropy}",
"-trim_qual_left=0 -trim_qual_left=0 -trim_qual_window=0 -trim_qual_step=0"
].join(' ').trim()
ext.prefix = { "${meta.id}-${meta.run_accession}" }
publishDir = [
path: { "${params.outdir}/prinseqplusplus/" },
mode: params.publish_dir_mode,
pattern: '*{_good_out.fastq.gz,_good_out_R1.fastq.gz,_good_out_R2.fastq.gz,log}',
enabled: params.save_complexityfiltered_reads
] ]
} }
@ -155,7 +166,7 @@ process {
ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
publishDir = [ publishDir = [
path: { "${params.outdir}/malt/${meta.db_name}" }, path: { "${params.outdir}/malt/${meta.db_name}" },
mode: 'copy', mode: params.publish_dir_mode,
pattern: '*.{rma6,tab,text,sam,log}' pattern: '*.{rma6,tab,text,sam,log}'
] ]
} }
@ -165,7 +176,7 @@ process {
ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
publishDir = [ publishDir = [
path: { "${params.outdir}/kraken2/${meta.db_name}" }, path: { "${params.outdir}/kraken2/${meta.db_name}" },
mode: 'copy', mode: params.publish_dir_mode,
pattern: '*.{fastq.gz,txt}' pattern: '*.{fastq.gz,txt}'
] ]
} }
@ -187,10 +198,10 @@ process {
] ]
} }
withName: CENTRIFUGE { withName: CENTRIFUGE_CENTRIFUGE {
publishDir = [ publishDir = [
path: { "${params.outdir}/centrifuge/${meta.db_name}" }, path: { "${params.outdir}/centrifuge/${meta.db_name}" },
mode: 'copy', mode: params.publish_dir_mode,
pattern: '*.{fastq.gz,txt}' pattern: '*.{fastq.gz,txt}'
] ]
ext.args = { "${meta.db_params}" } ext.args = { "${meta.db_params}" }

View file

@ -27,7 +27,8 @@ params {
run_kraken2 = true run_kraken2 = true
run_malt = true run_malt = true
run_metaphlan3 = true run_metaphlan3 = true
shortread_clipmerge = true
run_centrifuge = true run_centrifuge = true
shortread_clipmerge = true
longread_clip = false
shortread_complexityfilter = true
} }

View file

@ -4,11 +4,17 @@
"repos": { "repos": {
"nf-core/modules": { "nf-core/modules": {
"adapterremoval": { "adapterremoval": {
"git_sha": "f0800157544a82ae222931764483331a81812012" "git_sha": "879d42c5e28661fe0a5e744c9e2c515868f9e08a"
},
"bbmap/bbduk": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
}, },
"cat/fastq": { "cat/fastq": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
}, },
"centrifuge/centrifuge": {
"git_sha": "d2726fcf75063960f06b36d2229a4c0966614108"
},
"custom/dumpsoftwareversions": { "custom/dumpsoftwareversions": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
}, },
@ -33,8 +39,8 @@
"porechop": { "porechop": {
"git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046"
}, },
"centrifuge": { "prinseqplusplus": {
"git_sha": "ea41a8a6f761b9993d857570e872abaae3fea555" "git_sha": "f1c5384c31e985591716afdd732cf8c2ae29d05b"
}, },
"untar": { "untar": {
"git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918" "git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918"

View file

@ -12,14 +12,13 @@ process ADAPTERREMOVAL {
path(adapterlist) path(adapterlist)
output: output:
tuple val(meta), path("${prefix}.truncated.gz") , optional: true, emit: singles_truncated tuple val(meta), path("${prefix}.truncated.fastq.gz") , optional: true, emit: singles_truncated
tuple val(meta), path("${prefix}.discarded.gz") , optional: true, emit: discarded tuple val(meta), path("${prefix}.discarded.fastq.gz") , optional: true, emit: discarded
tuple val(meta), path("${prefix}.pair1.truncated.gz") , optional: true, emit: pair1_truncated tuple val(meta), path("${prefix}.pair{1,2}.truncated.fastq.gz") , optional: true, emit: paired_truncated
tuple val(meta), path("${prefix}.pair2.truncated.gz") , optional: true, emit: pair2_truncated tuple val(meta), path("${prefix}.collapsed.fastq.gz") , optional: true, emit: collapsed
tuple val(meta), path("${prefix}.collapsed.gz") , optional: true, emit: collapsed tuple val(meta), path("${prefix}.collapsed.truncated.fastq.gz") , optional: true, emit: collapsed_truncated
tuple val(meta), path("${prefix}.collapsed.truncated.gz") , optional: true, emit: collapsed_truncated tuple val(meta), path("${prefix}.paired.fastq.gz") , optional: true, emit: paired_interleaved
tuple val(meta), path("${prefix}.paired.gz") , optional: true, emit: paired_interleaved tuple val(meta), path('*.settings') , emit: settings
tuple val(meta), path('*.log') , emit: log
path "versions.yml" , emit: versions path "versions.yml" , emit: versions
when: when:
@ -38,10 +37,19 @@ process ADAPTERREMOVAL {
$adapterlist \\ $adapterlist \\
--basename ${prefix} \\ --basename ${prefix} \\
--threads ${task.cpus} \\ --threads ${task.cpus} \\
--settings ${prefix}.log \\
--seed 42 \\ --seed 42 \\
--gzip --gzip
ensure_fastq() {
if [ -f "\${1}" ]; then
mv "\${1}" "\${1::-3}.fastq.gz"
fi
}
ensure_fastq '${prefix}.truncated.gz'
ensure_fastq '${prefix}.discarded.gz'
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
"${task.process}": "${task.process}":
adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g") adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")
@ -56,10 +64,24 @@ process ADAPTERREMOVAL {
$adapterlist \\ $adapterlist \\
--basename ${prefix} \\ --basename ${prefix} \\
--threads $task.cpus \\ --threads $task.cpus \\
--settings ${prefix}.log \\
--seed 42 \\ --seed 42 \\
--gzip --gzip
ensure_fastq() {
if [ -f "\${1}" ]; then
mv "\${1}" "\${1::-3}.fastq.gz"
fi
}
ensure_fastq '${prefix}.truncated.gz'
ensure_fastq '${prefix}.discarded.gz'
ensure_fastq '${prefix}.pair1.truncated.gz'
ensure_fastq '${prefix}.pair2.truncated.gz'
ensure_fastq '${prefix}.collapsed.gz'
ensure_fastq '${prefix}.collapsed.truncated.gz'
ensure_fastq '${prefix}.paired.gz'
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
"${task.process}": "${task.process}":
adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g") adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")

View file

@ -43,43 +43,43 @@ output:
Adapter trimmed FastQ files of either single-end reads, or singleton Adapter trimmed FastQ files of either single-end reads, or singleton
'orphaned' reads from merging of paired-end data (i.e., one of the pair 'orphaned' reads from merging of paired-end data (i.e., one of the pair
was lost due to filtering thresholds). was lost due to filtering thresholds).
pattern: "*.truncated.gz" pattern: "*.truncated.fastq.gz"
- discarded: - discarded:
type: file type: file
description: | description: |
Adapter trimmed FastQ files of reads that did not pass filtering Adapter trimmed FastQ files of reads that did not pass filtering
thresholds. thresholds.
pattern: "*.discarded.gz" pattern: "*.discarded.fastq.gz"
- pair1_truncated: - pair1_truncated:
type: file type: file
description: | description: |
Adapter trimmed R1 FastQ files of paired-end reads that did not merge Adapter trimmed R1 FastQ files of paired-end reads that did not merge
with their respective R2 pair due to long templates. The respective pair with their respective R2 pair due to long templates. The respective pair
is stored in 'pair2_truncated'. is stored in 'pair2_truncated'.
pattern: "*.pair1.truncated.gz" pattern: "*.pair1.truncated.fastq.gz"
- pair2_truncated: - pair2_truncated:
type: file type: file
description: | description: |
Adapter trimmed R2 FastQ files of paired-end reads that did not merge Adapter trimmed R2 FastQ files of paired-end reads that did not merge
with their respective R1 pair due to long templates. The respective pair with their respective R1 pair due to long templates. The respective pair
is stored in 'pair1_truncated'. is stored in 'pair1_truncated'.
pattern: "*.pair2.truncated.gz" pattern: "*.pair2.truncated.fastq.gz"
- collapsed: - collapsed:
type: file type: file
description: | description: |
Collapsed FastQ of paired-end reads that successfully merged with their Collapsed FastQ of paired-end reads that successfully merged with their
respective R1 pair but were not trimmed. respective R1 pair but were not trimmed.
pattern: "*.collapsed.gz" pattern: "*.collapsed.fastq.gz"
- collapsed_truncated: - collapsed_truncated:
type: file type: file
description: | description: |
Collapsed FastQ of paired-end reads that successfully merged with their Collapsed FastQ of paired-end reads that successfully merged with their
respective R1 pair and were trimmed of adapter due to sufficient overlap. respective R1 pair and were trimmed of adapter due to sufficient overlap.
pattern: "*.collapsed.truncated.gz" pattern: "*.collapsed.truncated.fastq.gz"
- log: - log:
type: file type: file
description: AdapterRemoval log file description: AdapterRemoval log file
pattern: "*.log" pattern: "*.settings"
- versions: - versions:
type: file type: file
description: File containing software versions description: File containing software versions

View file

@ -0,0 +1,43 @@
process BBMAP_BBDUK {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::bbmap=38.90" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bbmap:38.90--he522d1c_1' :
'quay.io/biocontainers/bbmap:38.90--he522d1c_1' }"
input:
tuple val(meta), path(reads)
path contaminants
output:
tuple val(meta), path('*.fastq.gz'), emit: reads
tuple val(meta), path('*.log') , emit: log
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def raw = meta.single_end ? "in=${reads[0]}" : "in1=${reads[0]} in2=${reads[1]}"
def trimmed = meta.single_end ? "out=${prefix}.fastq.gz" : "out1=${prefix}_1.fastq.gz out2=${prefix}_2.fastq.gz"
def contaminants_fa = contaminants ? "ref=$contaminants" : ''
"""
maxmem=\$(echo \"$task.memory\"| sed 's/ GB/g/g')
bbduk.sh \\
-Xmx\$maxmem \\
$raw \\
$trimmed \\
threads=$task.cpus \\
$args \\
$contaminants_fa \\
&> ${prefix}.bbduk.log
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bbmap: \$(bbversion.sh)
END_VERSIONS
"""
}

View file

@ -0,0 +1,52 @@
name: bbmap_bbduk
description: Adapter and quality trimming of sequencing reads
keywords:
- trimming
- adapter trimming
- quality trimming
tools:
- bbmap:
description: BBMap is a short read aligner, as well as various other bioinformatic tools.
homepage: https://jgi.doe.gov/data-and-tools/bbtools/bb-tools-user-guide/
documentation: https://jgi.doe.gov/data-and-tools/bbtools/bb-tools-user-guide/
tool_dev_url: None
doi: ""
licence: ["UC-LBL license (see package)"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
- contaminants:
type: file
description: |
Reference files containing adapter and/or contaminant sequences for sequence kmer matching
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: The trimmed/modified fastq reads
pattern: "*fastq.gz"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- log:
type: file
description: Bbduk log file
pattern: "*bbduk.log"
authors:
- "@MGordon09"

View file

@ -0,0 +1,61 @@
process CENTRIFUGE_CENTRIFUGE {
tag "$meta.id"
label 'process_high'
conda (params.enable_conda ? "bioconda::centrifuge=1.0.4_beta" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4_beta--h9a82719_6' :
'quay.io/biocontainers/centrifuge:1.0.4_beta--h9a82719_6' }"
input:
tuple val(meta), path(reads)
path db
val save_unaligned
val save_aligned
val sam_format
output:
tuple val(meta), path('*report.txt') , emit: report
tuple val(meta), path('*results.txt') , emit: results
tuple val(meta), path('*.sam') , optional: true, emit: sam
tuple val(meta), path('*.mapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_mapped
tuple val(meta), path('*.unmapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_unmapped
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def paired = meta.single_end ? "-U ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}"
def unaligned = ''
def aligned = ''
if (meta.single_end) {
unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : ''
aligned = save_aligned ? "--al-gz ${prefix}.mapped.fastq.gz" : ''
} else {
unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : ''
aligned = save_aligned ? "--al-conc-gz ${prefix}.mapped.fastq.gz" : ''
}
def sam_output = sam_format ? "--out-fmt 'sam'" : ''
"""
## we add "-no-name ._" to ensure silly Mac OSX metafiles files aren't included
db_name=`find -L ${db} -name "*.1.cf" -not -name "._*" | sed 's/.1.cf//'`
centrifuge \\
-x \$db_name \\
-p $task.cpus \\
$paired \\
--report-file ${prefix}.report.txt \\
-S ${prefix}.results.txt \\
$unaligned \\
$aligned \\
$sam_output \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //')
END_VERSIONS
"""
}

View file

@ -0,0 +1,66 @@
name: centrifuge_centrifuge
description: Classifies metagenomic sequence data
keywords:
- classify
- metagenomics
- fastq
- db
tools:
- centrifuge:
description: Centrifuge is a classifier for metagenomic sequences.
homepage: https://ccb.jhu.edu/software/centrifuge/
documentation: https://ccb.jhu.edu/software/centrifuge/manual.shtml
doi: 10.1101/gr.210641.116
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
- db:
type: directory
description: Path to directory containing centrifuge database files
- save_unaligned:
type: value
description: If true unmapped fastq files are saved
- save_aligned:
type: value
description: If true mapped fastq files are saved
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- report:
type: file
description: |
File containing a classification summary
pattern: "*.{report.txt}"
- results:
type: file
description: |
File containing classification results
pattern: "*.{results.txt}"
- fastq_unmapped:
type: file
description: Unmapped fastq files
pattern: "*.unmapped.fastq.gz"
- fastq_mapped:
type: file
description: Mapped fastq files
pattern: "*.mapped.fastq.gz"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@sofstam"
- "@jfy133"
- "@sateeshperi"

View file

@ -10,7 +10,6 @@ process CENTRIFUGE {
input: input:
tuple val(meta), path(reads) tuple val(meta), path(reads)
path db path db
val db_name
val save_unaligned val save_unaligned
val save_aligned val save_aligned
val sam_format val sam_format
@ -43,8 +42,9 @@ process CENTRIFUGE {
} }
def sam_output = sam_format ? "--out-fmt 'sam'" : '' def sam_output = sam_format ? "--out-fmt 'sam'" : ''
""" """
tar -xf $db
centrifuge \\ centrifuge \\
-x ${db}/${db_name} \\ -x $db_name \\
-p $task.cpus \\ -p $task.cpus \\
$paired \\ $paired \\
--report-file ${prefix}.report.txt \\ --report-file ${prefix}.report.txt \\

View file

@ -27,9 +27,6 @@ input:
type: directory type: directory
description: Centrifuge database in .tar.gz format description: Centrifuge database in .tar.gz format
pattern: "*.tar.gz" pattern: "*.tar.gz"
- db_name:
type: string
description: Centrifuge database filenames without the suffix ".cf"
- save_unaligned: - save_unaligned:
type: value type: value
description: If true unmapped fastq files are saved description: If true unmapped fastq files are saved

View file

@ -0,0 +1,61 @@
process PRINSEQPLUSPLUS {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::prinseq-plus-plus=1.2.3" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/prinseq-plus-plus:1.2.3--hc90279e_1':
'quay.io/biocontainers/prinseq-plus-plus:1.2.3--hc90279e_1' }"
input:
tuple val(meta), path(reads)
output:
tuple val(meta), path("*_good_out*.fastq.gz") , emit: good_reads
tuple val(meta), path("*_single_out*.fastq.gz"), optional: true, emit: single_reads
tuple val(meta), path("*_bad_out*.fastq.gz") , optional: true, emit: bad_reads
tuple val(meta), path("*.log") , emit: log
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
if (meta.single_end) {
"""
prinseq++ \\
-threads $task.cpus \\
-fastq ${reads} \\
-out_name ${prefix} \\
-out_gz \\
-VERBOSE 1 \\
$args \\
| tee ${prefix}.log
cat <<-END_VERSIONS > versions.yml
"${task.process}":
prinseqplusplus: \$(echo \$(prinseq++ --version | cut -f 2 -d ' ' ))
END_VERSIONS
"""
} else {
"""
prinseq++ \\
-threads $task.cpus \\
-fastq ${reads[0]} \\
-fastq2 ${reads[1]} \\
-out_name ${prefix} \\
-out_gz \\
-VERBOSE 1 \\
$args \\
| tee ${prefix}.log
cat <<-END_VERSIONS > versions.yml
"${task.process}":
prinseqplusplus: \$(echo \$(prinseq++ --version | cut -f 2 -d ' ' ))
END_VERSIONS
"""
}
}

View file

@ -0,0 +1,60 @@
name: "prinseqplusplus"
description: PRINSEQ++ is a C++ implementation of the prinseq-lite.pl program. It can be used to filter, reformat or trim genomic and metagenomic sequence data
keywords:
- fastq
- fasta
- filter
- trim
tools:
- "prinseqplusplus":
description: "PRINSEQ++ - Multi-threaded C++ sequence cleaning"
homepage: "https://github.com/Adrian-Cantu/PRINSEQ-plus-plus"
documentation: "https://github.com/Adrian-Cantu/PRINSEQ-plus-plus"
tool_dev_url: "https://github.com/Adrian-Cantu/PRINSEQ-plus-plus"
doi: "10.7287/peerj.preprints.27553v1"
licence: "['GPL v2']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end
data, respectively.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- good_reads:
type: file
description: Reads passing filter(s) in gzipped FASTQ format
pattern: "*_good_out_{R1,R2}.fastq.gz"
- single_reads:
type: file
description: |
Single reads without the pair passing filter(s) in gzipped FASTQ format
pattern: "*_single_out_{R1,R2}.fastq.gz"
- bad_reads:
type: file
description: |
Reads without not passing filter(s) in gzipped FASTQ format
pattern: "*_bad_out_{R1,R2}.fastq.gz"
- log:
type: file
description: |
Verbose level 2 STDOUT information in a log file
pattern: "*.log"
authors:
- "@jfy133"

View file

@ -51,7 +51,7 @@ params {
max_cpus = 16 max_cpus = 16
max_time = '240.h' max_time = '240.h'
// Databaess // Databases
databases = null databases = null
// FASTQ preprocessing // FASTQ preprocessing
@ -64,6 +64,18 @@ params {
shortread_clipmerge_adapter2 = null shortread_clipmerge_adapter2 = null
shortread_clipmerge_minlength = 15 shortread_clipmerge_minlength = 15
longread_clip = false longread_clip = false
save_preprocessed_reads = false
// Complexity filtering
shortread_complexityfilter = false
shortread_complexityfilter_tool = 'bbduk'
shortread_complexityfilter_entropy = 0.3
shortread_complexityfilter_bbduk_windowsize = 50
shortread_complexityfilter_bbduk_mask = false
shortread_complexityfilter_prinseqplusplus_mode = 'entropy'
shortread_complexityfilter_prinseqplusplus_dustscore = 0.5
save_complexityfiltered_reads = false
// MALT // MALT
run_malt = false run_malt = false

View file

@ -276,8 +276,7 @@
"type": "boolean" "type": "boolean"
}, },
"shortread_clipmerge_excludeunmerged": { "shortread_clipmerge_excludeunmerged": {
"type": "boolean", "type": "boolean"
"default": false
}, },
"longread_clip": { "longread_clip": {
"type": "boolean" "type": "boolean"
@ -334,9 +333,40 @@
"type": "integer", "type": "integer",
"default": 15 "default": 15
}, },
"centrifuge_db_name": { "save_preprocessed_reads": {
"type": "boolean",
"default": false
},
"shortread_complexityfilter_tool": {
"type": "string", "type": "string",
"default": null "default": "bbduk"
},
"shortread_complexityfilter_bbduk_windowsize": {
"type": "integer",
"default": 50
},
"shortread_complexityfilter_bbduk_mask": {
"type": "boolean"
},
"shortread_complexityfilter": {
"type": "boolean"
},
"shortread_complexityfilter_entropy": {
"type": "number",
"default": 0.3
},
"shortread_complexityfilter_prinseqplusplus_mode": {
"type": "string",
"default": "entropy",
"enum": ["entropy", "dust"]
},
"shortread_complexityfilter_prinseqplusplus_dustscore": {
"type": "number",
"default": 0.5
},
"save_complexityfiltered_reads": {
"type": "boolean",
"default": false
} }
} }
} }

View file

@ -0,0 +1,61 @@
process CENTRIFUGE_CENTRIFUGE {
tag "$meta.id"
label 'process_high'
conda (params.enable_conda ? "bioconda::centrifuge=1.0.4_beta" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4_beta--h9a82719_6' :
'quay.io/biocontainers/centrifuge:1.0.4_beta--h9a82719_6' }"
input:
tuple val(meta), path(reads)
path db
val save_unaligned
val save_aligned
val sam_format
output:
tuple val(meta), path('*report.txt') , emit: report
tuple val(meta), path('*results.txt') , emit: results
tuple val(meta), path('*.sam') , optional: true, emit: sam
tuple val(meta), path('*.mapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_mapped
tuple val(meta), path('*.unmapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_unmapped
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def paired = meta.single_end ? "-U ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}"
def unaligned = ''
def aligned = ''
if (meta.single_end) {
unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : ''
aligned = save_aligned ? "--al-gz ${prefix}.mapped.fastq.gz" : ''
} else {
unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : ''
aligned = save_aligned ? "--al-conc-gz ${prefix}.mapped.fastq.gz" : ''
}
def sam_output = sam_format ? "--out-fmt 'sam'" : ''
"""
## we add "-no-name ._" to ensure silly Mac OSX metafiles files aren't included
db_name=`find -L ${db} -name "*.1.cf" -not -name "._*" | sed 's/.1.cf//'`
centrifuge \\
-x \$db_name \\
-p $task.cpus \\
$paired \\
--report-file ${prefix}.report.txt \\
-S ${prefix}.results.txt \\
$unaligned \\
$aligned \\
$sam_output \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //')
END_VERSIONS
"""
}

View file

@ -0,0 +1,66 @@
name: centrifuge_centrifuge
description: Classifies metagenomic sequence data
keywords:
- classify
- metagenomics
- fastq
- db
tools:
- centrifuge:
description: Centrifuge is a classifier for metagenomic sequences.
homepage: https://ccb.jhu.edu/software/centrifuge/
documentation: https://ccb.jhu.edu/software/centrifuge/manual.shtml
doi: 10.1101/gr.210641.116
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
- db:
type: directory
description: Path to directory containing centrifuge database files
- save_unaligned:
type: value
description: If true unmapped fastq files are saved
- save_aligned:
type: value
description: If true mapped fastq files are saved
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- report:
type: file
description: |
File containing a classification summary
pattern: "*.{report.txt}"
- results:
type: file
description: |
File containing classification results
pattern: "*.{results.txt}"
- fastq_unmapped:
type: file
description: Unmapped fastq files
pattern: "*.unmapped.fastq.gz"
- fastq_mapped:
type: file
description: Mapped fastq files
pattern: "*.mapped.fastq.gz"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@sofstam"
- "@jfy133"
- "@sateeshperi"

View file

@ -1,6 +1,6 @@
/* //
Process long raw reads with porechop // Process long raw reads with porechop
*/ //
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main' include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main'
include { PORECHOP } from '../../modules/nf-core/modules/porechop/main' include { PORECHOP } from '../../modules/nf-core/modules/porechop/main'
@ -25,7 +25,7 @@ workflow LONGREAD_PREPROCESSING {
FASTQC_PROCESSED ( PORECHOP.out.reads ) FASTQC_PROCESSED ( PORECHOP.out.reads )
ch_versions = ch_versions.mix(PORECHOP.out.versions.first()) ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip.collect{it[1]} ) ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
emit: emit:

View file

@ -1,15 +1,10 @@
/* //
Process short raw reads with AdapterRemoval // Process short raw reads with AdapterRemoval
*/ //
include { ADAPTERREMOVAL as ADAPTERREMOVAL_SINGLE } from '../../modules/nf-core/modules/adapterremoval/main' include { ADAPTERREMOVAL as ADAPTERREMOVAL_SINGLE } from '../../modules/nf-core/modules/adapterremoval/main'
include { ADAPTERREMOVAL as ADAPTERREMOVAL_PAIRED } from '../../modules/nf-core/modules/adapterremoval/main' include { ADAPTERREMOVAL as ADAPTERREMOVAL_PAIRED } from '../../modules/nf-core/modules/adapterremoval/main'
include { CAT_FASTQ } from '../../modules/nf-core/modules/cat/fastq/main' include { CAT_FASTQ } from '../../modules/nf-core/modules/cat/fastq/main'
include {
ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION1;
ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION2;
ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION3;
} from '../../modules/local/ensure_fastq_extension'
workflow SHORTREAD_ADAPTERREMOVAL { workflow SHORTREAD_ADAPTERREMOVAL {
@ -36,89 +31,63 @@ workflow SHORTREAD_ADAPTERREMOVAL {
if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) { if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) {
ENSURE_FASTQ_EXTENSION1( ch_concat_fastq = Channel.empty()
Channel.empty().mix( .mix(
ADAPTERREMOVAL_PAIRED.out.collapsed, ADAPTERREMOVAL_PAIRED.out.collapsed,
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated, ADAPTERREMOVAL_PAIRED.out.collapsed_truncated,
ADAPTERREMOVAL_PAIRED.out.singles_truncated, ADAPTERREMOVAL_PAIRED.out.singles_truncated,
ADAPTERREMOVAL_PAIRED.out.pair1_truncated, ADAPTERREMOVAL_PAIRED.out.paired_truncated
ADAPTERREMOVAL_PAIRED.out.pair2_truncated
) )
.map { meta, reads -> .map { meta, reads ->
meta.single_end = true def meta_new = meta.clone()
[meta, reads] meta_new.single_end = true
[meta_new, reads]
} }
)
CAT_FASTQ(
ENSURE_FASTQ_EXTENSION1.out.reads
.groupTuple() .groupTuple()
) // Paired-end reads cause a nested tuple during grouping.
// We want to present a flat list of files to `CAT_FASTQ`.
.map { meta, fastq -> [meta, fastq.flatten()] }
ENSURE_FASTQ_EXTENSION2(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
CAT_FASTQ(ch_concat_fastq)
ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
.mix(ENSURE_FASTQ_EXTENSION2.out.reads) .mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
} else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) { } else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) {
ENSURE_FASTQ_EXTENSION1( ch_concat_fastq = Channel.empty()
Channel.empty().mix( .mix(
ADAPTERREMOVAL_PAIRED.out.collapsed, ADAPTERREMOVAL_PAIRED.out.collapsed,
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated ADAPTERREMOVAL_PAIRED.out.collapsed_truncated
) )
.map { meta, reads -> .map { meta, reads ->
meta.single_end = true def meta_new = meta.clone()
[meta, reads] meta_new.single_end = true
[meta_new, reads]
} }
)
CAT_FASTQ(
ENSURE_FASTQ_EXTENSION1.out.reads
.groupTuple() .groupTuple()
) .map { meta, fastq -> [meta, fastq.flatten()] }
ENSURE_FASTQ_EXTENSION2(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
CAT_FASTQ(ch_concat_fastq)
ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
.mix(ENSURE_FASTQ_EXTENSION2.out.reads) .mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
} else { } else {
ENSURE_FASTQ_EXTENSION1( ch_adapterremoval_reads_prepped = ADAPTERREMOVAL_PAIRED.out.paired_truncated
ADAPTERREMOVAL_PAIRED.out.pair1_truncated .mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
.map { meta, reads ->
meta.single_end = true
[meta, reads]
}
)
ENSURE_FASTQ_EXTENSION2(
ADAPTERREMOVAL_PAIRED.out.pair2_truncated
.map { meta, reads ->
meta.single_end = true
[meta, reads]
}
)
ENSURE_FASTQ_EXTENSION3(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
ch_adapterremoval_reads_prepped = ENSURE_FASTQ_EXTENSION1.out.reads
.join(ENSURE_FASTQ_EXTENSION2.out.reads)
.groupTuple()
.map { meta, pair1, pair2 ->
meta.single_end = false
[ meta, [ pair1, pair2 ].flatten() ]
}
.mix(ENSURE_FASTQ_EXTENSION3.out.reads)
} }
ch_versions = ch_versions.mix( ADAPTERREMOVAL_SINGLE.out.versions.first() ) ch_versions = ch_versions.mix( ADAPTERREMOVAL_SINGLE.out.versions.first() )
ch_versions = ch_versions.mix( ADAPTERREMOVAL_PAIRED.out.versions.first() ) ch_versions = ch_versions.mix( ADAPTERREMOVAL_PAIRED.out.versions.first() )
ch_multiqc_files = ch_multiqc_files.mix( ch_multiqc_files = ch_multiqc_files.mix(
ADAPTERREMOVAL_PAIRED.out.log.collect{it[1]}, ADAPTERREMOVAL_PAIRED.out.settings,
ADAPTERREMOVAL_SINGLE.out.log.collect{it[1]} ADAPTERREMOVAL_SINGLE.out.settings
) )
emit: emit:

View file

@ -0,0 +1,32 @@
//
// Check input samplesheet and get read channels
//
include { BBMAP_BBDUK } from '../../modules/nf-core/modules/bbmap/bbduk/main'
include { PRINSEQPLUSPLUS } from '../../modules/nf-core/modules/prinseqplusplus/main'
workflow SHORTREAD_COMPLEXITYFILTERING {
take:
reads // [ [ meta ], [ reads ] ]
main:
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()
if ( params.shortread_complexityfilter_tool == 'bbduk' ) {
ch_filtered_reads = BBMAP_BBDUK ( reads, [] ).reads
ch_versions = ch_versions.mix( BBMAP_BBDUK.out.versions.first() )
ch_multiqc_files = ch_multiqc_files.mix( BBMAP_BBDUK.out.log )
} else if ( params.shortread_complexityfilter_tool == 'prinseqplusplus' ) {
ch_filtered_reads = PRINSEQPLUSPLUS ( reads ).good_reads
ch_versions = ch_versions.mix( PRINSEQPLUSPLUS.out.versions.first() )
} else {
ch_filtered_reads = reads
}
emit:
reads = ch_filtered_reads // channel: [ val(meta), [ reads ] ]
versions = ch_versions // channel: [ versions.yml ]
mqc = ch_multiqc_files
}

View file

@ -1,6 +1,6 @@
/* //
Process short raw reads with FastP // Process short raw reads with FastP
*/ //
include { FASTP as FASTP_SINGLE } from '../../modules/nf-core/modules/fastp/main' include { FASTP as FASTP_SINGLE } from '../../modules/nf-core/modules/fastp/main'
include { FASTP as FASTP_PAIRED } from '../../modules/nf-core/modules/fastp/main' include { FASTP as FASTP_PAIRED } from '../../modules/nf-core/modules/fastp/main'
@ -44,8 +44,8 @@ workflow SHORTREAD_FASTP {
ch_processed_reads = ch_fastp_reads_prepped ch_processed_reads = ch_fastp_reads_prepped
ch_multiqc_files = ch_multiqc_files.mix( FASTP_SINGLE.out.json.collect{it[1]} ) ch_multiqc_files = ch_multiqc_files.mix( FASTP_SINGLE.out.json )
ch_multiqc_files = ch_multiqc_files.mix( FASTP_PAIRED.out.json.collect{it[1]} ) ch_multiqc_files = ch_multiqc_files.mix( FASTP_PAIRED.out.json )
emit: emit:
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ] reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]

View file

@ -1,5 +1,5 @@
// //
// Check input samplesheet and get read channels // Perform read trimming and merging
// //
@ -9,7 +9,7 @@ include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules
workflow SHORTREAD_PREPROCESSING { workflow SHORTREAD_PREPROCESSING {
take: take:
reads // file: /path/to/samplesheet.csv reads // [ [ meta ], [ reads ] ]
main: main:
ch_versions = Channel.empty() ch_versions = Channel.empty()
@ -29,7 +29,7 @@ workflow SHORTREAD_PREPROCESSING {
FASTQC_PROCESSED ( ch_processed_reads ) FASTQC_PROCESSED ( ch_processed_reads )
ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions ) ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip.collect{it[1]} ) ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
emit: emit:
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ] reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]

View file

@ -17,7 +17,7 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true
// Check mandatory parameters // Check mandatory parameters
if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' } if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] warning: MALT does not except uncollapsed paired-reads. Pairs will be profiled as separate files." if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] warning: MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files."
if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "[nf-core/taxprofiler] error: cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs" if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "[nf-core/taxprofiler] error: cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs"
/* /*
@ -43,6 +43,7 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check'
include { DB_CHECK } from '../subworkflows/local/db_check' include { DB_CHECK } from '../subworkflows/local/db_check'
include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing' include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing'
include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing' include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing'
include { SHORTREAD_COMPLEXITYFILTERING } from '../subworkflows/local/shortread_complexityfiltering'
/* /*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -60,7 +61,7 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/
include { CAT_FASTQ } from '../modules/nf-core/modules/cat/fastq/main' include { CAT_FASTQ } from '../modules/nf-core/modules/cat/fastq/main'
include { MALT_RUN } from '../modules/nf-core/modules/malt/run/main' include { MALT_RUN } from '../modules/nf-core/modules/malt/run/main'
include { KRAKEN2_KRAKEN2 } from '../modules/nf-core/modules/kraken2/kraken2/main' include { KRAKEN2_KRAKEN2 } from '../modules/nf-core/modules/kraken2/kraken2/main'
include { CENTRIFUGE } from '../modules/nf-core/modules/centrifuge/main' include { CENTRIFUGE_CENTRIFUGE } from '../modules/nf-core/modules/centrifuge/centrifuge/main'
include { METAPHLAN3 } from '../modules/nf-core/modules/metaphlan3/main' include { METAPHLAN3 } from '../modules/nf-core/modules/metaphlan3/main'
/* /*
@ -91,7 +92,7 @@ workflow TAXPROFILER {
/* /*
MODULE: Run FastQC MODULE: Run FastQC
*/ */
ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore ).dump(tag: "input_to_fastq") ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore )
FASTQC ( FASTQC (
ch_input_for_fastqc ch_input_for_fastqc
@ -99,10 +100,6 @@ workflow TAXPROFILER {
ch_versions = ch_versions.mix(FASTQC.out.versions.first()) ch_versions = ch_versions.mix(FASTQC.out.versions.first())
CUSTOM_DUMPSOFTWAREVERSIONS (
ch_versions.unique().collectFile(name: 'collated_versions.yml')
)
/* /*
SUBWORKFLOW: PERFORM PREPROCESSING SUBWORKFLOW: PERFORM PREPROCESSING
*/ */
@ -115,17 +112,26 @@ workflow TAXPROFILER {
if ( params.longread_clip ) { if ( params.longread_clip ) {
ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads
.map { it -> [ it[0], [it[1]] ] } .map { it -> [ it[0], [it[1]] ] }
ch_versions = ch_versions.mix(LONGREAD_PREPROCESSING.out.versions.first())
} else { } else {
ch_longreads_preprocessed = INPUT_CHECK.out.nanopore ch_longreads_preprocessed = INPUT_CHECK.out.nanopore
} }
/*
SUBWORKFLOW: COMPLEXITY FILTERING
*/
if ( params.shortread_complexityfilter ) {
ch_shortreads_filtered = SHORTREAD_COMPLEXITYFILTERING ( ch_shortreads_preprocessed ).reads
} else {
ch_shortreads_filtered = ch_shortreads_preprocessed
}
/* /*
COMBINE READS WITH POSSIBLE DATABASES COMBINE READS WITH POSSIBLE DATABASES
*/ */
// e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90] // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
ch_input_for_profiling = ch_shortreads_preprocessed ch_input_for_profiling = ch_shortreads_filtered
.mix( ch_longreads_preprocessed ) .mix( ch_longreads_preprocessed )
.combine(DB_CHECK.out.dbs) .combine(DB_CHECK.out.dbs)
.branch { .branch {
@ -179,7 +185,6 @@ workflow TAXPROFILER {
// RUN PROFILING // RUN PROFILING
// //
ch_input_for_metaphlan3 = ch_input_for_profiling.metaphlan3 ch_input_for_metaphlan3 = ch_input_for_profiling.metaphlan3
.dump(tag: "input_metaphlan3")
.multiMap { .multiMap {
it -> it ->
reads: [it[0] + it[2], it[1]] reads: [it[0] + it[2], it[1]]
@ -198,7 +203,7 @@ workflow TAXPROFILER {
} }
if ( params.run_centrifuge ) { if ( params.run_centrifuge ) {
CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_db_name, params.centrifuge_save_unaligned, params.centrifuge_save_aligned, params.centrifuge_sam_format ) CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_unaligned, params.centrifuge_save_aligned, params.centrifuge_sam_format )
} }
if ( params.run_metaphlan3 ) { if ( params.run_metaphlan3 ) {
@ -208,6 +213,12 @@ workflow TAXPROFILER {
/* /*
MODULE: MultiQC MODULE: MultiQC
*/ */
CUSTOM_DUMPSOFTWAREVERSIONS (
ch_versions.unique().collectFile(name: 'collated_versions.yml')
)
workflow_summary = WorkflowTaxprofiler.paramsSummaryMultiqc(workflow, summary_params) workflow_summary = WorkflowTaxprofiler.paramsSummaryMultiqc(workflow, summary_params)
ch_workflow_summary = Channel.value(workflow_summary) ch_workflow_summary = Channel.value(workflow_summary)
@ -219,21 +230,30 @@ workflow TAXPROFILER {
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
if (params.shortread_clipmerge) { if (params.shortread_clipmerge) {
ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_PREPROCESSING.out.mqc) ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions )
} }
if (params.longread_clip) { if (params.longread_clip) {
ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_PREPROCESSING.out.mqc) ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( LONGREAD_PREPROCESSING.out.versions )
} }
if (params.shortread_complexityfilter){
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_COMPLEXITYFILTERING.out.mqc.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( SHORTREAD_COMPLEXITYFILTERING.out.versions )
}
if (params.run_kraken2) { if (params.run_kraken2) {
ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) ) ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() ) ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() )
} }
if (params.run_malt) { if (params.run_malt) {
ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) ) ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( MALT_RUN.out.versions.first() ) ch_versions = ch_versions.mix( MALT_RUN.out.versions.first() )
} }
// TODO MALT results overwriting per database?
// TODO Versions for Karken/MALT not report? // TODO Versions for Karken/MALT not report?
// TODO create multiQC module for metaphlan // TODO create multiQC module for metaphlan
MULTIQC ( MULTIQC (