mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-22 16:19:54 +00:00
Check if PR works with centrifuge/centrifuge module
This commit is contained in:
commit
606e5da7d5
25 changed files with 763 additions and 179 deletions
8
.github/workflows/ci.yml
vendored
8
.github/workflows/ci.yml
vendored
|
@ -29,8 +29,16 @@ jobs:
|
||||||
- NXF_VER: ""
|
- NXF_VER: ""
|
||||||
NXF_EDGE: "1"
|
NXF_EDGE: "1"
|
||||||
parameters:
|
parameters:
|
||||||
|
- "--longread_clip false"
|
||||||
|
- "--shortread_clip false"
|
||||||
- "--shortread_clipmerge_tool fastp"
|
- "--shortread_clipmerge_tool fastp"
|
||||||
|
- "--shortread_clipmerge_tool fastp --shortread_clipmerge_mergepairs --shortread_clipmerge_excludeunmerged"
|
||||||
|
- "--shortread_clipmerge_tool fastp --shortread_clipmerge_mergepairs"
|
||||||
- "--shortread_clipmerge_tool adapterremoval"
|
- "--shortread_clipmerge_tool adapterremoval"
|
||||||
|
- "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs --shortread_clipmerge_excludeunmerged"
|
||||||
|
- "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs"
|
||||||
|
- "--shortread_complexityfilter_tool bbduk"
|
||||||
|
- "--shortread_complexityfilter_tool prinseq"
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Check out pipeline code
|
- name: Check out pipeline code
|
||||||
|
|
14
CITATIONS.md
14
CITATIONS.md
|
@ -18,21 +18,27 @@
|
||||||
|
|
||||||
- [fastp](https://doi.org/10.1093/bioinformatics/bty560)
|
- [fastp](https://doi.org/10.1093/bioinformatics/bty560)
|
||||||
|
|
||||||
> Chen, Shifu, Yanqing Zhou, Yaru Chen, and Jia Gu. 2018. “Fastp: An Ultra-Fast All-in-One FASTQ Preprocessor.” Bioinformatics 34 (17): i884-90. 10.1093/bioinformatics/bty560.
|
> Chen, Shifu, Yanqing Zhou, Yaru Chen, and Jia Gu. 2018. Fastp: An Ultra-Fast All-in-One FASTQ Preprocessor. Bioinformatics 34 (17): i884-90. 10.1093/bioinformatics/bty560.
|
||||||
|
|
||||||
- [AdapterRemoval2](https://doi.org/10.1186/s13104-016-1900-2)
|
- [AdapterRemoval2](https://doi.org/10.1186/s13104-016-1900-2)
|
||||||
|
|
||||||
> Schubert, Mikkel, Stinus Lindgreen, and Ludovic Orlando. 2016. “AdapterRemoval v2: Rapid Adapter Trimming, Identification, and Read Merging.” BMC Research Notes 9 (February): 88. doi:10.1186/s13104-016-1900-2.
|
> Schubert, Mikkel, Stinus Lindgreen, and Ludovic Orlando. 2016. AdapterRemoval v2: Rapid Adapter Trimming, Identification, and Read Merging. BMC Research Notes 9 (February): 88. doi:10.1186/s13104-016-1900-2.
|
||||||
|
|
||||||
- [Porechop](https://github.com/rrwick/Porechop)
|
- [Porechop](https://github.com/rrwick/Porechop)
|
||||||
|
|
||||||
|
- [BBTools](http://sourceforge.net/projects/bbmap/)
|
||||||
|
|
||||||
|
- [PRINSEQ++](https://doi.org/10.7287/peerj.preprints.27553v1)
|
||||||
|
|
||||||
|
> Cantu, Vito Adrian, Jeffrey Sadural, and Robert Edwards. 2019. PRINSEQ++, a Multi-Threaded Tool for Fast and Efficient Quality Control and Preprocessing of Sequencing Datasets. e27553v1. PeerJ Preprints. doi: 10.7287/peerj.preprints.27553v1.
|
||||||
|
|
||||||
- [Kraken2](https://doi.org/10.1186/s13059-019-1891-0)
|
- [Kraken2](https://doi.org/10.1186/s13059-019-1891-0)
|
||||||
|
|
||||||
> Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. “Improved Metagenomic Analysis with Kraken 2.” Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0.
|
> Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. Improved Metagenomic Analysis with Kraken 2. Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0.
|
||||||
|
|
||||||
- [MALT](https://doi.org/10.1038/s41559-017-0446-6)
|
- [MALT](https://doi.org/10.1038/s41559-017-0446-6)
|
||||||
|
|
||||||
> Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. “Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico.” Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6.
|
> Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico. Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6.
|
||||||
|
|
||||||
- [MetaPhlAn3](https://doi.org/10.7554/eLife.65088)
|
- [MetaPhlAn3](https://doi.org/10.7554/eLife.65088)
|
||||||
|
|
||||||
|
|
|
@ -12,12 +12,6 @@
|
||||||
|
|
||||||
process {
|
process {
|
||||||
|
|
||||||
publishDir = [
|
|
||||||
path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
|
|
||||||
mode: params.publish_dir_mode,
|
|
||||||
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
|
|
||||||
]
|
|
||||||
|
|
||||||
withName: SAMPLESHEET_CHECK {
|
withName: SAMPLESHEET_CHECK {
|
||||||
publishDir = [
|
publishDir = [
|
||||||
path: { "${params.outdir}/pipeline_info" },
|
path: { "${params.outdir}/pipeline_info" },
|
||||||
|
@ -34,20 +28,12 @@ process {
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
withName: UNTAR {
|
|
||||||
publishDir = [
|
|
||||||
path: { "${params.outdir}/databases" },
|
|
||||||
mode: params.publish_dir_mode,
|
|
||||||
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
withName: FASTQC {
|
withName: FASTQC {
|
||||||
ext.args = '--quiet'
|
ext.args = '--quiet'
|
||||||
ext.prefix = { "${meta.id}_${meta.run_accession}_raw" }
|
ext.prefix = { "${meta.id}_${meta.run_accession}_raw" }
|
||||||
publishDir = [
|
publishDir = [
|
||||||
path: { "${params.outdir}/fastqc/raw" },
|
path: { "${params.outdir}/fastqc/raw" },
|
||||||
mode: 'copy',
|
mode: params.publish_dir_mode,
|
||||||
pattern: '*.html'
|
pattern: '*.html'
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
@ -57,7 +43,7 @@ process {
|
||||||
ext.prefix = { "${meta.id}_${meta.run_accession}_processed" }
|
ext.prefix = { "${meta.id}_${meta.run_accession}_processed" }
|
||||||
publishDir = [
|
publishDir = [
|
||||||
path: { "${params.outdir}/fastqc/processed" },
|
path: { "${params.outdir}/fastqc/processed" },
|
||||||
mode: 'copy',
|
mode: params.publish_dir_mode,
|
||||||
pattern: '*.html'
|
pattern: '*.html'
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
@ -73,8 +59,9 @@ process {
|
||||||
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||||
publishDir = [
|
publishDir = [
|
||||||
path: { "${params.outdir}/fastp" },
|
path: { "${params.outdir}/fastp" },
|
||||||
mode: 'copy',
|
mode: params.publish_dir_mode,
|
||||||
pattern: '*.fastq.gz'
|
pattern: '*.fastq.gz',
|
||||||
|
enabled: params.save_preprocessed_reads
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -92,8 +79,9 @@ process {
|
||||||
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||||
publishDir = [
|
publishDir = [
|
||||||
path: { "${params.outdir}/fastp" },
|
path: { "${params.outdir}/fastp" },
|
||||||
mode: 'copy',
|
mode: params.publish_dir_mode,
|
||||||
pattern: '*.fastq.gz'
|
pattern: '*.fastq.gz',
|
||||||
|
enabled: params.save_preprocessed_reads
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -108,8 +96,9 @@ process {
|
||||||
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||||
publishDir = [
|
publishDir = [
|
||||||
path: { "${params.outdir}/adapterremoval" },
|
path: { "${params.outdir}/adapterremoval" },
|
||||||
mode: 'copy',
|
mode: params.publish_dir_mode,
|
||||||
pattern: '*.fastq.gz'
|
pattern: '*.fastq.gz',
|
||||||
|
enabled: params.save_preprocessed_reads
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -127,26 +116,48 @@ process {
|
||||||
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||||
publishDir = [
|
publishDir = [
|
||||||
path: { "${params.outdir}/adapterremoval" },
|
path: { "${params.outdir}/adapterremoval" },
|
||||||
mode: 'copy',
|
mode: params.publish_dir_mode,
|
||||||
pattern: '*.fastq.gz'
|
pattern: '*.fastq.gz',
|
||||||
|
enabled: params.save_preprocessed_reads
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
withName: PORECHOP {
|
withName: PORECHOP {
|
||||||
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||||
publishDir = [
|
publishDir = [
|
||||||
path: { "${params.outdir}/porechop" },
|
path: { "${params.outdir}/porechop" },
|
||||||
mode: 'copy',
|
mode: params.publish_dir_mode,
|
||||||
pattern: '*.fastq.gz'
|
pattern: '*.fastq.gz',
|
||||||
|
enabled: params.save_preprocessed_reads
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
withName: CAT_FASTQ {
|
withName: BBMAP_BBDUK {
|
||||||
|
ext.args = [
|
||||||
|
"entropy=${params.shortread_complexityfilter_entropy}",
|
||||||
|
"entropywindow=${params.shortread_complexityfilter_bbduk_windowsize}",
|
||||||
|
params.shortread_complexityfilter_bbduk_mask ? "entropymask=t" : "entropymask=f"
|
||||||
|
].join(' ').trim()
|
||||||
|
ext.prefix = { "${meta.id}-${meta.run_accession}" }
|
||||||
publishDir = [
|
publishDir = [
|
||||||
path: { "${params.outdir}/prepared_sequences" },
|
path: { "${params.outdir}/bbduk/" },
|
||||||
mode: 'copy',
|
mode: params.publish_dir_mode,
|
||||||
pattern: '*.fastq.gz'
|
pattern: '*.{fastq.gz,log}',
|
||||||
|
enabled: params.save_complexityfiltered_reads
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
withName: PRINSEQPLUSPLUS {
|
||||||
|
ext.args = [
|
||||||
|
params.shortread_complexityfilter_prinseqplusplus_mode == 'dust' ? "-lc_dust=${params.shortread_complexityfilter_prinseqplusplus_dustscore}" : "-lc_entropy=${params.shortread_complexityfilter_entropy}",
|
||||||
|
"-trim_qual_left=0 -trim_qual_left=0 -trim_qual_window=0 -trim_qual_step=0"
|
||||||
|
].join(' ').trim()
|
||||||
|
ext.prefix = { "${meta.id}-${meta.run_accession}" }
|
||||||
|
publishDir = [
|
||||||
|
path: { "${params.outdir}/prinseqplusplus/" },
|
||||||
|
mode: params.publish_dir_mode,
|
||||||
|
pattern: '*{_good_out.fastq.gz,_good_out_R1.fastq.gz,_good_out_R2.fastq.gz,log}',
|
||||||
|
enabled: params.save_complexityfiltered_reads
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -155,7 +166,7 @@ process {
|
||||||
ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
|
ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
|
||||||
publishDir = [
|
publishDir = [
|
||||||
path: { "${params.outdir}/malt/${meta.db_name}" },
|
path: { "${params.outdir}/malt/${meta.db_name}" },
|
||||||
mode: 'copy',
|
mode: params.publish_dir_mode,
|
||||||
pattern: '*.{rma6,tab,text,sam,log}'
|
pattern: '*.{rma6,tab,text,sam,log}'
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
@ -165,7 +176,7 @@ process {
|
||||||
ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
|
ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
|
||||||
publishDir = [
|
publishDir = [
|
||||||
path: { "${params.outdir}/kraken2/${meta.db_name}" },
|
path: { "${params.outdir}/kraken2/${meta.db_name}" },
|
||||||
mode: 'copy',
|
mode: params.publish_dir_mode,
|
||||||
pattern: '*.{fastq.gz,txt}'
|
pattern: '*.{fastq.gz,txt}'
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
@ -187,10 +198,10 @@ process {
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
withName: CENTRIFUGE {
|
withName: CENTRIFUGE_CENTRIFUGE {
|
||||||
publishDir = [
|
publishDir = [
|
||||||
path: { "${params.outdir}/centrifuge/${meta.db_name}" },
|
path: { "${params.outdir}/centrifuge/${meta.db_name}" },
|
||||||
mode: 'copy',
|
mode: params.publish_dir_mode,
|
||||||
pattern: '*.{fastq.gz,txt}'
|
pattern: '*.{fastq.gz,txt}'
|
||||||
]
|
]
|
||||||
ext.args = { "${meta.db_params}" }
|
ext.args = { "${meta.db_params}" }
|
||||||
|
|
|
@ -27,7 +27,8 @@ params {
|
||||||
run_kraken2 = true
|
run_kraken2 = true
|
||||||
run_malt = true
|
run_malt = true
|
||||||
run_metaphlan3 = true
|
run_metaphlan3 = true
|
||||||
shortread_clipmerge = true
|
|
||||||
run_centrifuge = true
|
run_centrifuge = true
|
||||||
|
shortread_clipmerge = true
|
||||||
|
longread_clip = false
|
||||||
|
shortread_complexityfilter = true
|
||||||
}
|
}
|
||||||
|
|
12
modules.json
12
modules.json
|
@ -4,11 +4,17 @@
|
||||||
"repos": {
|
"repos": {
|
||||||
"nf-core/modules": {
|
"nf-core/modules": {
|
||||||
"adapterremoval": {
|
"adapterremoval": {
|
||||||
"git_sha": "f0800157544a82ae222931764483331a81812012"
|
"git_sha": "879d42c5e28661fe0a5e744c9e2c515868f9e08a"
|
||||||
|
},
|
||||||
|
"bbmap/bbduk": {
|
||||||
|
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
||||||
},
|
},
|
||||||
"cat/fastq": {
|
"cat/fastq": {
|
||||||
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
||||||
},
|
},
|
||||||
|
"centrifuge/centrifuge": {
|
||||||
|
"git_sha": "d2726fcf75063960f06b36d2229a4c0966614108"
|
||||||
|
},
|
||||||
"custom/dumpsoftwareversions": {
|
"custom/dumpsoftwareversions": {
|
||||||
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
||||||
},
|
},
|
||||||
|
@ -33,8 +39,8 @@
|
||||||
"porechop": {
|
"porechop": {
|
||||||
"git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046"
|
"git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046"
|
||||||
},
|
},
|
||||||
"centrifuge": {
|
"prinseqplusplus": {
|
||||||
"git_sha": "ea41a8a6f761b9993d857570e872abaae3fea555"
|
"git_sha": "f1c5384c31e985591716afdd732cf8c2ae29d05b"
|
||||||
},
|
},
|
||||||
"untar": {
|
"untar": {
|
||||||
"git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918"
|
"git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918"
|
||||||
|
|
42
modules/nf-core/modules/adapterremoval/main.nf
generated
42
modules/nf-core/modules/adapterremoval/main.nf
generated
|
@ -12,14 +12,13 @@ process ADAPTERREMOVAL {
|
||||||
path(adapterlist)
|
path(adapterlist)
|
||||||
|
|
||||||
output:
|
output:
|
||||||
tuple val(meta), path("${prefix}.truncated.gz") , optional: true, emit: singles_truncated
|
tuple val(meta), path("${prefix}.truncated.fastq.gz") , optional: true, emit: singles_truncated
|
||||||
tuple val(meta), path("${prefix}.discarded.gz") , optional: true, emit: discarded
|
tuple val(meta), path("${prefix}.discarded.fastq.gz") , optional: true, emit: discarded
|
||||||
tuple val(meta), path("${prefix}.pair1.truncated.gz") , optional: true, emit: pair1_truncated
|
tuple val(meta), path("${prefix}.pair{1,2}.truncated.fastq.gz") , optional: true, emit: paired_truncated
|
||||||
tuple val(meta), path("${prefix}.pair2.truncated.gz") , optional: true, emit: pair2_truncated
|
tuple val(meta), path("${prefix}.collapsed.fastq.gz") , optional: true, emit: collapsed
|
||||||
tuple val(meta), path("${prefix}.collapsed.gz") , optional: true, emit: collapsed
|
tuple val(meta), path("${prefix}.collapsed.truncated.fastq.gz") , optional: true, emit: collapsed_truncated
|
||||||
tuple val(meta), path("${prefix}.collapsed.truncated.gz") , optional: true, emit: collapsed_truncated
|
tuple val(meta), path("${prefix}.paired.fastq.gz") , optional: true, emit: paired_interleaved
|
||||||
tuple val(meta), path("${prefix}.paired.gz") , optional: true, emit: paired_interleaved
|
tuple val(meta), path('*.settings') , emit: settings
|
||||||
tuple val(meta), path('*.log') , emit: log
|
|
||||||
path "versions.yml" , emit: versions
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
when:
|
when:
|
||||||
|
@ -38,10 +37,19 @@ process ADAPTERREMOVAL {
|
||||||
$adapterlist \\
|
$adapterlist \\
|
||||||
--basename ${prefix} \\
|
--basename ${prefix} \\
|
||||||
--threads ${task.cpus} \\
|
--threads ${task.cpus} \\
|
||||||
--settings ${prefix}.log \\
|
|
||||||
--seed 42 \\
|
--seed 42 \\
|
||||||
--gzip
|
--gzip
|
||||||
|
|
||||||
|
ensure_fastq() {
|
||||||
|
if [ -f "\${1}" ]; then
|
||||||
|
mv "\${1}" "\${1::-3}.fastq.gz"
|
||||||
|
fi
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
ensure_fastq '${prefix}.truncated.gz'
|
||||||
|
ensure_fastq '${prefix}.discarded.gz'
|
||||||
|
|
||||||
cat <<-END_VERSIONS > versions.yml
|
cat <<-END_VERSIONS > versions.yml
|
||||||
"${task.process}":
|
"${task.process}":
|
||||||
adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")
|
adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")
|
||||||
|
@ -56,10 +64,24 @@ process ADAPTERREMOVAL {
|
||||||
$adapterlist \\
|
$adapterlist \\
|
||||||
--basename ${prefix} \\
|
--basename ${prefix} \\
|
||||||
--threads $task.cpus \\
|
--threads $task.cpus \\
|
||||||
--settings ${prefix}.log \\
|
|
||||||
--seed 42 \\
|
--seed 42 \\
|
||||||
--gzip
|
--gzip
|
||||||
|
|
||||||
|
ensure_fastq() {
|
||||||
|
if [ -f "\${1}" ]; then
|
||||||
|
mv "\${1}" "\${1::-3}.fastq.gz"
|
||||||
|
fi
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
ensure_fastq '${prefix}.truncated.gz'
|
||||||
|
ensure_fastq '${prefix}.discarded.gz'
|
||||||
|
ensure_fastq '${prefix}.pair1.truncated.gz'
|
||||||
|
ensure_fastq '${prefix}.pair2.truncated.gz'
|
||||||
|
ensure_fastq '${prefix}.collapsed.gz'
|
||||||
|
ensure_fastq '${prefix}.collapsed.truncated.gz'
|
||||||
|
ensure_fastq '${prefix}.paired.gz'
|
||||||
|
|
||||||
cat <<-END_VERSIONS > versions.yml
|
cat <<-END_VERSIONS > versions.yml
|
||||||
"${task.process}":
|
"${task.process}":
|
||||||
adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")
|
adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")
|
||||||
|
|
14
modules/nf-core/modules/adapterremoval/meta.yml
generated
14
modules/nf-core/modules/adapterremoval/meta.yml
generated
|
@ -43,43 +43,43 @@ output:
|
||||||
Adapter trimmed FastQ files of either single-end reads, or singleton
|
Adapter trimmed FastQ files of either single-end reads, or singleton
|
||||||
'orphaned' reads from merging of paired-end data (i.e., one of the pair
|
'orphaned' reads from merging of paired-end data (i.e., one of the pair
|
||||||
was lost due to filtering thresholds).
|
was lost due to filtering thresholds).
|
||||||
pattern: "*.truncated.gz"
|
pattern: "*.truncated.fastq.gz"
|
||||||
- discarded:
|
- discarded:
|
||||||
type: file
|
type: file
|
||||||
description: |
|
description: |
|
||||||
Adapter trimmed FastQ files of reads that did not pass filtering
|
Adapter trimmed FastQ files of reads that did not pass filtering
|
||||||
thresholds.
|
thresholds.
|
||||||
pattern: "*.discarded.gz"
|
pattern: "*.discarded.fastq.gz"
|
||||||
- pair1_truncated:
|
- pair1_truncated:
|
||||||
type: file
|
type: file
|
||||||
description: |
|
description: |
|
||||||
Adapter trimmed R1 FastQ files of paired-end reads that did not merge
|
Adapter trimmed R1 FastQ files of paired-end reads that did not merge
|
||||||
with their respective R2 pair due to long templates. The respective pair
|
with their respective R2 pair due to long templates. The respective pair
|
||||||
is stored in 'pair2_truncated'.
|
is stored in 'pair2_truncated'.
|
||||||
pattern: "*.pair1.truncated.gz"
|
pattern: "*.pair1.truncated.fastq.gz"
|
||||||
- pair2_truncated:
|
- pair2_truncated:
|
||||||
type: file
|
type: file
|
||||||
description: |
|
description: |
|
||||||
Adapter trimmed R2 FastQ files of paired-end reads that did not merge
|
Adapter trimmed R2 FastQ files of paired-end reads that did not merge
|
||||||
with their respective R1 pair due to long templates. The respective pair
|
with their respective R1 pair due to long templates. The respective pair
|
||||||
is stored in 'pair1_truncated'.
|
is stored in 'pair1_truncated'.
|
||||||
pattern: "*.pair2.truncated.gz"
|
pattern: "*.pair2.truncated.fastq.gz"
|
||||||
- collapsed:
|
- collapsed:
|
||||||
type: file
|
type: file
|
||||||
description: |
|
description: |
|
||||||
Collapsed FastQ of paired-end reads that successfully merged with their
|
Collapsed FastQ of paired-end reads that successfully merged with their
|
||||||
respective R1 pair but were not trimmed.
|
respective R1 pair but were not trimmed.
|
||||||
pattern: "*.collapsed.gz"
|
pattern: "*.collapsed.fastq.gz"
|
||||||
- collapsed_truncated:
|
- collapsed_truncated:
|
||||||
type: file
|
type: file
|
||||||
description: |
|
description: |
|
||||||
Collapsed FastQ of paired-end reads that successfully merged with their
|
Collapsed FastQ of paired-end reads that successfully merged with their
|
||||||
respective R1 pair and were trimmed of adapter due to sufficient overlap.
|
respective R1 pair and were trimmed of adapter due to sufficient overlap.
|
||||||
pattern: "*.collapsed.truncated.gz"
|
pattern: "*.collapsed.truncated.fastq.gz"
|
||||||
- log:
|
- log:
|
||||||
type: file
|
type: file
|
||||||
description: AdapterRemoval log file
|
description: AdapterRemoval log file
|
||||||
pattern: "*.log"
|
pattern: "*.settings"
|
||||||
- versions:
|
- versions:
|
||||||
type: file
|
type: file
|
||||||
description: File containing software versions
|
description: File containing software versions
|
||||||
|
|
43
modules/nf-core/modules/bbmap/bbduk/main.nf
generated
Normal file
43
modules/nf-core/modules/bbmap/bbduk/main.nf
generated
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
process BBMAP_BBDUK {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_medium'
|
||||||
|
|
||||||
|
conda (params.enable_conda ? "bioconda::bbmap=38.90" : null)
|
||||||
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
|
'https://depot.galaxyproject.org/singularity/bbmap:38.90--he522d1c_1' :
|
||||||
|
'quay.io/biocontainers/bbmap:38.90--he522d1c_1' }"
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(reads)
|
||||||
|
path contaminants
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path('*.fastq.gz'), emit: reads
|
||||||
|
tuple val(meta), path('*.log') , emit: log
|
||||||
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
|
when:
|
||||||
|
task.ext.when == null || task.ext.when
|
||||||
|
|
||||||
|
script:
|
||||||
|
def args = task.ext.args ?: ''
|
||||||
|
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||||
|
def raw = meta.single_end ? "in=${reads[0]}" : "in1=${reads[0]} in2=${reads[1]}"
|
||||||
|
def trimmed = meta.single_end ? "out=${prefix}.fastq.gz" : "out1=${prefix}_1.fastq.gz out2=${prefix}_2.fastq.gz"
|
||||||
|
def contaminants_fa = contaminants ? "ref=$contaminants" : ''
|
||||||
|
"""
|
||||||
|
maxmem=\$(echo \"$task.memory\"| sed 's/ GB/g/g')
|
||||||
|
bbduk.sh \\
|
||||||
|
-Xmx\$maxmem \\
|
||||||
|
$raw \\
|
||||||
|
$trimmed \\
|
||||||
|
threads=$task.cpus \\
|
||||||
|
$args \\
|
||||||
|
$contaminants_fa \\
|
||||||
|
&> ${prefix}.bbduk.log
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
bbmap: \$(bbversion.sh)
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
52
modules/nf-core/modules/bbmap/bbduk/meta.yml
generated
Normal file
52
modules/nf-core/modules/bbmap/bbduk/meta.yml
generated
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
name: bbmap_bbduk
|
||||||
|
description: Adapter and quality trimming of sequencing reads
|
||||||
|
keywords:
|
||||||
|
- trimming
|
||||||
|
- adapter trimming
|
||||||
|
- quality trimming
|
||||||
|
tools:
|
||||||
|
- bbmap:
|
||||||
|
description: BBMap is a short read aligner, as well as various other bioinformatic tools.
|
||||||
|
homepage: https://jgi.doe.gov/data-and-tools/bbtools/bb-tools-user-guide/
|
||||||
|
documentation: https://jgi.doe.gov/data-and-tools/bbtools/bb-tools-user-guide/
|
||||||
|
tool_dev_url: None
|
||||||
|
doi: ""
|
||||||
|
licence: ["UC-LBL license (see package)"]
|
||||||
|
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- reads:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
|
||||||
|
respectively.
|
||||||
|
- contaminants:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
Reference files containing adapter and/or contaminant sequences for sequence kmer matching
|
||||||
|
|
||||||
|
output:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- reads:
|
||||||
|
type: file
|
||||||
|
description: The trimmed/modified fastq reads
|
||||||
|
pattern: "*fastq.gz"
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: "versions.yml"
|
||||||
|
- log:
|
||||||
|
type: file
|
||||||
|
description: Bbduk log file
|
||||||
|
pattern: "*bbduk.log"
|
||||||
|
|
||||||
|
authors:
|
||||||
|
- "@MGordon09"
|
61
modules/nf-core/modules/centrifuge/centrifuge/main.nf
generated
Normal file
61
modules/nf-core/modules/centrifuge/centrifuge/main.nf
generated
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
process CENTRIFUGE_CENTRIFUGE {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_high'
|
||||||
|
|
||||||
|
conda (params.enable_conda ? "bioconda::centrifuge=1.0.4_beta" : null)
|
||||||
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
|
'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4_beta--h9a82719_6' :
|
||||||
|
'quay.io/biocontainers/centrifuge:1.0.4_beta--h9a82719_6' }"
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(reads)
|
||||||
|
path db
|
||||||
|
val save_unaligned
|
||||||
|
val save_aligned
|
||||||
|
val sam_format
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path('*report.txt') , emit: report
|
||||||
|
tuple val(meta), path('*results.txt') , emit: results
|
||||||
|
tuple val(meta), path('*.sam') , optional: true, emit: sam
|
||||||
|
tuple val(meta), path('*.mapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_mapped
|
||||||
|
tuple val(meta), path('*.unmapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_unmapped
|
||||||
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
|
when:
|
||||||
|
task.ext.when == null || task.ext.when
|
||||||
|
|
||||||
|
script:
|
||||||
|
def args = task.ext.args ?: ''
|
||||||
|
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||||
|
def paired = meta.single_end ? "-U ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}"
|
||||||
|
def unaligned = ''
|
||||||
|
def aligned = ''
|
||||||
|
if (meta.single_end) {
|
||||||
|
unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : ''
|
||||||
|
aligned = save_aligned ? "--al-gz ${prefix}.mapped.fastq.gz" : ''
|
||||||
|
} else {
|
||||||
|
unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : ''
|
||||||
|
aligned = save_aligned ? "--al-conc-gz ${prefix}.mapped.fastq.gz" : ''
|
||||||
|
}
|
||||||
|
def sam_output = sam_format ? "--out-fmt 'sam'" : ''
|
||||||
|
"""
|
||||||
|
## we add "-no-name ._" to ensure silly Mac OSX metafiles files aren't included
|
||||||
|
db_name=`find -L ${db} -name "*.1.cf" -not -name "._*" | sed 's/.1.cf//'`
|
||||||
|
centrifuge \\
|
||||||
|
-x \$db_name \\
|
||||||
|
-p $task.cpus \\
|
||||||
|
$paired \\
|
||||||
|
--report-file ${prefix}.report.txt \\
|
||||||
|
-S ${prefix}.results.txt \\
|
||||||
|
$unaligned \\
|
||||||
|
$aligned \\
|
||||||
|
$sam_output \\
|
||||||
|
$args
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //')
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
66
modules/nf-core/modules/centrifuge/centrifuge/meta.yml
generated
Normal file
66
modules/nf-core/modules/centrifuge/centrifuge/meta.yml
generated
Normal file
|
@ -0,0 +1,66 @@
|
||||||
|
name: centrifuge_centrifuge
|
||||||
|
description: Classifies metagenomic sequence data
|
||||||
|
keywords:
|
||||||
|
- classify
|
||||||
|
- metagenomics
|
||||||
|
- fastq
|
||||||
|
- db
|
||||||
|
tools:
|
||||||
|
- centrifuge:
|
||||||
|
description: Centrifuge is a classifier for metagenomic sequences.
|
||||||
|
homepage: https://ccb.jhu.edu/software/centrifuge/
|
||||||
|
documentation: https://ccb.jhu.edu/software/centrifuge/manual.shtml
|
||||||
|
doi: 10.1101/gr.210641.116
|
||||||
|
licence: ["GPL v3"]
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- reads:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
|
||||||
|
respectively.
|
||||||
|
- db:
|
||||||
|
type: directory
|
||||||
|
description: Path to directory containing centrifuge database files
|
||||||
|
- save_unaligned:
|
||||||
|
type: value
|
||||||
|
description: If true unmapped fastq files are saved
|
||||||
|
- save_aligned:
|
||||||
|
type: value
|
||||||
|
description: If true mapped fastq files are saved
|
||||||
|
output:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- report:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
File containing a classification summary
|
||||||
|
pattern: "*.{report.txt}"
|
||||||
|
- results:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
File containing classification results
|
||||||
|
pattern: "*.{results.txt}"
|
||||||
|
- fastq_unmapped:
|
||||||
|
type: file
|
||||||
|
description: Unmapped fastq files
|
||||||
|
pattern: "*.unmapped.fastq.gz"
|
||||||
|
- fastq_mapped:
|
||||||
|
type: file
|
||||||
|
description: Mapped fastq files
|
||||||
|
pattern: "*.mapped.fastq.gz"
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: "versions.yml"
|
||||||
|
authors:
|
||||||
|
- "@sofstam"
|
||||||
|
- "@jfy133"
|
||||||
|
- "@sateeshperi"
|
|
@ -10,7 +10,6 @@ process CENTRIFUGE {
|
||||||
input:
|
input:
|
||||||
tuple val(meta), path(reads)
|
tuple val(meta), path(reads)
|
||||||
path db
|
path db
|
||||||
val db_name
|
|
||||||
val save_unaligned
|
val save_unaligned
|
||||||
val save_aligned
|
val save_aligned
|
||||||
val sam_format
|
val sam_format
|
||||||
|
@ -43,8 +42,9 @@ process CENTRIFUGE {
|
||||||
}
|
}
|
||||||
def sam_output = sam_format ? "--out-fmt 'sam'" : ''
|
def sam_output = sam_format ? "--out-fmt 'sam'" : ''
|
||||||
"""
|
"""
|
||||||
|
tar -xf $db
|
||||||
centrifuge \\
|
centrifuge \\
|
||||||
-x ${db}/${db_name} \\
|
-x $db_name \\
|
||||||
-p $task.cpus \\
|
-p $task.cpus \\
|
||||||
$paired \\
|
$paired \\
|
||||||
--report-file ${prefix}.report.txt \\
|
--report-file ${prefix}.report.txt \\
|
|
@ -27,9 +27,6 @@ input:
|
||||||
type: directory
|
type: directory
|
||||||
description: Centrifuge database in .tar.gz format
|
description: Centrifuge database in .tar.gz format
|
||||||
pattern: "*.tar.gz"
|
pattern: "*.tar.gz"
|
||||||
- db_name:
|
|
||||||
type: string
|
|
||||||
description: Centrifuge database filenames without the suffix ".cf"
|
|
||||||
- save_unaligned:
|
- save_unaligned:
|
||||||
type: value
|
type: value
|
||||||
description: If true unmapped fastq files are saved
|
description: If true unmapped fastq files are saved
|
61
modules/nf-core/modules/prinseqplusplus/main.nf
generated
Normal file
61
modules/nf-core/modules/prinseqplusplus/main.nf
generated
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
process PRINSEQPLUSPLUS {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_low'
|
||||||
|
|
||||||
|
conda (params.enable_conda ? "bioconda::prinseq-plus-plus=1.2.3" : null)
|
||||||
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
|
'https://depot.galaxyproject.org/singularity/prinseq-plus-plus:1.2.3--hc90279e_1':
|
||||||
|
'quay.io/biocontainers/prinseq-plus-plus:1.2.3--hc90279e_1' }"
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(reads)
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path("*_good_out*.fastq.gz") , emit: good_reads
|
||||||
|
tuple val(meta), path("*_single_out*.fastq.gz"), optional: true, emit: single_reads
|
||||||
|
tuple val(meta), path("*_bad_out*.fastq.gz") , optional: true, emit: bad_reads
|
||||||
|
tuple val(meta), path("*.log") , emit: log
|
||||||
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
|
when:
|
||||||
|
task.ext.when == null || task.ext.when
|
||||||
|
|
||||||
|
script:
|
||||||
|
def args = task.ext.args ?: ''
|
||||||
|
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||||
|
|
||||||
|
if (meta.single_end) {
|
||||||
|
"""
|
||||||
|
prinseq++ \\
|
||||||
|
-threads $task.cpus \\
|
||||||
|
-fastq ${reads} \\
|
||||||
|
-out_name ${prefix} \\
|
||||||
|
-out_gz \\
|
||||||
|
-VERBOSE 1 \\
|
||||||
|
$args \\
|
||||||
|
| tee ${prefix}.log
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
prinseqplusplus: \$(echo \$(prinseq++ --version | cut -f 2 -d ' ' ))
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
} else {
|
||||||
|
"""
|
||||||
|
prinseq++ \\
|
||||||
|
-threads $task.cpus \\
|
||||||
|
-fastq ${reads[0]} \\
|
||||||
|
-fastq2 ${reads[1]} \\
|
||||||
|
-out_name ${prefix} \\
|
||||||
|
-out_gz \\
|
||||||
|
-VERBOSE 1 \\
|
||||||
|
$args \\
|
||||||
|
| tee ${prefix}.log
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
prinseqplusplus: \$(echo \$(prinseq++ --version | cut -f 2 -d ' ' ))
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
}
|
60
modules/nf-core/modules/prinseqplusplus/meta.yml
generated
Normal file
60
modules/nf-core/modules/prinseqplusplus/meta.yml
generated
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
name: "prinseqplusplus"
|
||||||
|
description: PRINSEQ++ is a C++ implementation of the prinseq-lite.pl program. It can be used to filter, reformat or trim genomic and metagenomic sequence data
|
||||||
|
keywords:
|
||||||
|
- fastq
|
||||||
|
- fasta
|
||||||
|
- filter
|
||||||
|
- trim
|
||||||
|
tools:
|
||||||
|
- "prinseqplusplus":
|
||||||
|
description: "PRINSEQ++ - Multi-threaded C++ sequence cleaning"
|
||||||
|
homepage: "https://github.com/Adrian-Cantu/PRINSEQ-plus-plus"
|
||||||
|
documentation: "https://github.com/Adrian-Cantu/PRINSEQ-plus-plus"
|
||||||
|
tool_dev_url: "https://github.com/Adrian-Cantu/PRINSEQ-plus-plus"
|
||||||
|
doi: "10.7287/peerj.preprints.27553v1"
|
||||||
|
licence: "['GPL v2']"
|
||||||
|
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- reads:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
List of input FastQ files of size 1 and 2 for single-end and paired-end
|
||||||
|
data, respectively.
|
||||||
|
|
||||||
|
output:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: "versions.yml"
|
||||||
|
- good_reads:
|
||||||
|
type: file
|
||||||
|
description: Reads passing filter(s) in gzipped FASTQ format
|
||||||
|
pattern: "*_good_out_{R1,R2}.fastq.gz"
|
||||||
|
- single_reads:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
Single reads without the pair passing filter(s) in gzipped FASTQ format
|
||||||
|
pattern: "*_single_out_{R1,R2}.fastq.gz"
|
||||||
|
- bad_reads:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
Reads without not passing filter(s) in gzipped FASTQ format
|
||||||
|
pattern: "*_bad_out_{R1,R2}.fastq.gz"
|
||||||
|
- log:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
Verbose level 2 STDOUT information in a log file
|
||||||
|
pattern: "*.log"
|
||||||
|
|
||||||
|
authors:
|
||||||
|
- "@jfy133"
|
|
@ -51,7 +51,7 @@ params {
|
||||||
max_cpus = 16
|
max_cpus = 16
|
||||||
max_time = '240.h'
|
max_time = '240.h'
|
||||||
|
|
||||||
// Databaess
|
// Databases
|
||||||
databases = null
|
databases = null
|
||||||
|
|
||||||
// FASTQ preprocessing
|
// FASTQ preprocessing
|
||||||
|
@ -64,6 +64,18 @@ params {
|
||||||
shortread_clipmerge_adapter2 = null
|
shortread_clipmerge_adapter2 = null
|
||||||
shortread_clipmerge_minlength = 15
|
shortread_clipmerge_minlength = 15
|
||||||
longread_clip = false
|
longread_clip = false
|
||||||
|
save_preprocessed_reads = false
|
||||||
|
|
||||||
|
// Complexity filtering
|
||||||
|
shortread_complexityfilter = false
|
||||||
|
shortread_complexityfilter_tool = 'bbduk'
|
||||||
|
shortread_complexityfilter_entropy = 0.3
|
||||||
|
shortread_complexityfilter_bbduk_windowsize = 50
|
||||||
|
shortread_complexityfilter_bbduk_mask = false
|
||||||
|
shortread_complexityfilter_prinseqplusplus_mode = 'entropy'
|
||||||
|
shortread_complexityfilter_prinseqplusplus_dustscore = 0.5
|
||||||
|
save_complexityfiltered_reads = false
|
||||||
|
|
||||||
|
|
||||||
// MALT
|
// MALT
|
||||||
run_malt = false
|
run_malt = false
|
||||||
|
|
|
@ -276,8 +276,7 @@
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
"shortread_clipmerge_excludeunmerged": {
|
"shortread_clipmerge_excludeunmerged": {
|
||||||
"type": "boolean",
|
"type": "boolean"
|
||||||
"default": false
|
|
||||||
},
|
},
|
||||||
"longread_clip": {
|
"longread_clip": {
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
|
@ -334,9 +333,40 @@
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"default": 15
|
"default": 15
|
||||||
},
|
},
|
||||||
"centrifuge_db_name": {
|
"save_preprocessed_reads": {
|
||||||
|
"type": "boolean",
|
||||||
|
"default": false
|
||||||
|
},
|
||||||
|
"shortread_complexityfilter_tool": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"default": null
|
"default": "bbduk"
|
||||||
|
},
|
||||||
|
"shortread_complexityfilter_bbduk_windowsize": {
|
||||||
|
"type": "integer",
|
||||||
|
"default": 50
|
||||||
|
},
|
||||||
|
"shortread_complexityfilter_bbduk_mask": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"shortread_complexityfilter": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"shortread_complexityfilter_entropy": {
|
||||||
|
"type": "number",
|
||||||
|
"default": 0.3
|
||||||
|
},
|
||||||
|
"shortread_complexityfilter_prinseqplusplus_mode": {
|
||||||
|
"type": "string",
|
||||||
|
"default": "entropy",
|
||||||
|
"enum": ["entropy", "dust"]
|
||||||
|
},
|
||||||
|
"shortread_complexityfilter_prinseqplusplus_dustscore": {
|
||||||
|
"type": "number",
|
||||||
|
"default": 0.5
|
||||||
|
},
|
||||||
|
"save_complexityfiltered_reads": {
|
||||||
|
"type": "boolean",
|
||||||
|
"default": false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
61
nf-core/modules/centrifuge/centrifuge/main.nf
Normal file
61
nf-core/modules/centrifuge/centrifuge/main.nf
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
process CENTRIFUGE_CENTRIFUGE {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_high'
|
||||||
|
|
||||||
|
conda (params.enable_conda ? "bioconda::centrifuge=1.0.4_beta" : null)
|
||||||
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
|
'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4_beta--h9a82719_6' :
|
||||||
|
'quay.io/biocontainers/centrifuge:1.0.4_beta--h9a82719_6' }"
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(reads)
|
||||||
|
path db
|
||||||
|
val save_unaligned
|
||||||
|
val save_aligned
|
||||||
|
val sam_format
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path('*report.txt') , emit: report
|
||||||
|
tuple val(meta), path('*results.txt') , emit: results
|
||||||
|
tuple val(meta), path('*.sam') , optional: true, emit: sam
|
||||||
|
tuple val(meta), path('*.mapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_mapped
|
||||||
|
tuple val(meta), path('*.unmapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_unmapped
|
||||||
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
|
when:
|
||||||
|
task.ext.when == null || task.ext.when
|
||||||
|
|
||||||
|
script:
|
||||||
|
def args = task.ext.args ?: ''
|
||||||
|
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||||
|
def paired = meta.single_end ? "-U ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}"
|
||||||
|
def unaligned = ''
|
||||||
|
def aligned = ''
|
||||||
|
if (meta.single_end) {
|
||||||
|
unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : ''
|
||||||
|
aligned = save_aligned ? "--al-gz ${prefix}.mapped.fastq.gz" : ''
|
||||||
|
} else {
|
||||||
|
unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : ''
|
||||||
|
aligned = save_aligned ? "--al-conc-gz ${prefix}.mapped.fastq.gz" : ''
|
||||||
|
}
|
||||||
|
def sam_output = sam_format ? "--out-fmt 'sam'" : ''
|
||||||
|
"""
|
||||||
|
## we add "-no-name ._" to ensure silly Mac OSX metafiles files aren't included
|
||||||
|
db_name=`find -L ${db} -name "*.1.cf" -not -name "._*" | sed 's/.1.cf//'`
|
||||||
|
centrifuge \\
|
||||||
|
-x \$db_name \\
|
||||||
|
-p $task.cpus \\
|
||||||
|
$paired \\
|
||||||
|
--report-file ${prefix}.report.txt \\
|
||||||
|
-S ${prefix}.results.txt \\
|
||||||
|
$unaligned \\
|
||||||
|
$aligned \\
|
||||||
|
$sam_output \\
|
||||||
|
$args
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //')
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
66
nf-core/modules/centrifuge/centrifuge/meta.yml
Normal file
66
nf-core/modules/centrifuge/centrifuge/meta.yml
Normal file
|
@ -0,0 +1,66 @@
|
||||||
|
name: centrifuge_centrifuge
|
||||||
|
description: Classifies metagenomic sequence data
|
||||||
|
keywords:
|
||||||
|
- classify
|
||||||
|
- metagenomics
|
||||||
|
- fastq
|
||||||
|
- db
|
||||||
|
tools:
|
||||||
|
- centrifuge:
|
||||||
|
description: Centrifuge is a classifier for metagenomic sequences.
|
||||||
|
homepage: https://ccb.jhu.edu/software/centrifuge/
|
||||||
|
documentation: https://ccb.jhu.edu/software/centrifuge/manual.shtml
|
||||||
|
doi: 10.1101/gr.210641.116
|
||||||
|
licence: ["GPL v3"]
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- reads:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
|
||||||
|
respectively.
|
||||||
|
- db:
|
||||||
|
type: directory
|
||||||
|
description: Path to directory containing centrifuge database files
|
||||||
|
- save_unaligned:
|
||||||
|
type: value
|
||||||
|
description: If true unmapped fastq files are saved
|
||||||
|
- save_aligned:
|
||||||
|
type: value
|
||||||
|
description: If true mapped fastq files are saved
|
||||||
|
output:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- report:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
File containing a classification summary
|
||||||
|
pattern: "*.{report.txt}"
|
||||||
|
- results:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
File containing classification results
|
||||||
|
pattern: "*.{results.txt}"
|
||||||
|
- fastq_unmapped:
|
||||||
|
type: file
|
||||||
|
description: Unmapped fastq files
|
||||||
|
pattern: "*.unmapped.fastq.gz"
|
||||||
|
- fastq_mapped:
|
||||||
|
type: file
|
||||||
|
description: Mapped fastq files
|
||||||
|
pattern: "*.mapped.fastq.gz"
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: "versions.yml"
|
||||||
|
authors:
|
||||||
|
- "@sofstam"
|
||||||
|
- "@jfy133"
|
||||||
|
- "@sateeshperi"
|
|
@ -1,6 +1,6 @@
|
||||||
/*
|
//
|
||||||
Process long raw reads with porechop
|
// Process long raw reads with porechop
|
||||||
*/
|
//
|
||||||
|
|
||||||
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main'
|
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main'
|
||||||
include { PORECHOP } from '../../modules/nf-core/modules/porechop/main'
|
include { PORECHOP } from '../../modules/nf-core/modules/porechop/main'
|
||||||
|
@ -25,7 +25,7 @@ workflow LONGREAD_PREPROCESSING {
|
||||||
|
|
||||||
FASTQC_PROCESSED ( PORECHOP.out.reads )
|
FASTQC_PROCESSED ( PORECHOP.out.reads )
|
||||||
ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
|
ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip.collect{it[1]} )
|
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
|
||||||
|
|
||||||
|
|
||||||
emit:
|
emit:
|
||||||
|
|
|
@ -1,15 +1,10 @@
|
||||||
/*
|
//
|
||||||
Process short raw reads with AdapterRemoval
|
// Process short raw reads with AdapterRemoval
|
||||||
*/
|
//
|
||||||
|
|
||||||
include { ADAPTERREMOVAL as ADAPTERREMOVAL_SINGLE } from '../../modules/nf-core/modules/adapterremoval/main'
|
include { ADAPTERREMOVAL as ADAPTERREMOVAL_SINGLE } from '../../modules/nf-core/modules/adapterremoval/main'
|
||||||
include { ADAPTERREMOVAL as ADAPTERREMOVAL_PAIRED } from '../../modules/nf-core/modules/adapterremoval/main'
|
include { ADAPTERREMOVAL as ADAPTERREMOVAL_PAIRED } from '../../modules/nf-core/modules/adapterremoval/main'
|
||||||
include { CAT_FASTQ } from '../../modules/nf-core/modules/cat/fastq/main'
|
include { CAT_FASTQ } from '../../modules/nf-core/modules/cat/fastq/main'
|
||||||
include {
|
|
||||||
ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION1;
|
|
||||||
ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION2;
|
|
||||||
ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION3;
|
|
||||||
} from '../../modules/local/ensure_fastq_extension'
|
|
||||||
|
|
||||||
workflow SHORTREAD_ADAPTERREMOVAL {
|
workflow SHORTREAD_ADAPTERREMOVAL {
|
||||||
|
|
||||||
|
@ -36,89 +31,63 @@ workflow SHORTREAD_ADAPTERREMOVAL {
|
||||||
|
|
||||||
if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) {
|
if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) {
|
||||||
|
|
||||||
ENSURE_FASTQ_EXTENSION1(
|
ch_concat_fastq = Channel.empty()
|
||||||
Channel.empty().mix(
|
.mix(
|
||||||
ADAPTERREMOVAL_PAIRED.out.collapsed,
|
ADAPTERREMOVAL_PAIRED.out.collapsed,
|
||||||
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated,
|
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated,
|
||||||
ADAPTERREMOVAL_PAIRED.out.singles_truncated,
|
ADAPTERREMOVAL_PAIRED.out.singles_truncated,
|
||||||
ADAPTERREMOVAL_PAIRED.out.pair1_truncated,
|
ADAPTERREMOVAL_PAIRED.out.paired_truncated
|
||||||
ADAPTERREMOVAL_PAIRED.out.pair2_truncated
|
|
||||||
)
|
)
|
||||||
.map { meta, reads ->
|
.map { meta, reads ->
|
||||||
meta.single_end = true
|
def meta_new = meta.clone()
|
||||||
[meta, reads]
|
meta_new.single_end = true
|
||||||
|
[meta_new, reads]
|
||||||
}
|
}
|
||||||
)
|
|
||||||
|
|
||||||
CAT_FASTQ(
|
|
||||||
ENSURE_FASTQ_EXTENSION1.out.reads
|
|
||||||
.groupTuple()
|
.groupTuple()
|
||||||
)
|
// Paired-end reads cause a nested tuple during grouping.
|
||||||
|
// We want to present a flat list of files to `CAT_FASTQ`.
|
||||||
|
.map { meta, fastq -> [meta, fastq.flatten()] }
|
||||||
|
|
||||||
ENSURE_FASTQ_EXTENSION2(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
|
|
||||||
|
CAT_FASTQ(ch_concat_fastq)
|
||||||
|
|
||||||
ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
|
ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
|
||||||
.mix(ENSURE_FASTQ_EXTENSION2.out.reads)
|
.mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
|
||||||
|
|
||||||
} else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) {
|
} else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) {
|
||||||
|
|
||||||
ENSURE_FASTQ_EXTENSION1(
|
ch_concat_fastq = Channel.empty()
|
||||||
Channel.empty().mix(
|
.mix(
|
||||||
ADAPTERREMOVAL_PAIRED.out.collapsed,
|
ADAPTERREMOVAL_PAIRED.out.collapsed,
|
||||||
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated
|
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated
|
||||||
)
|
)
|
||||||
.map { meta, reads ->
|
.map { meta, reads ->
|
||||||
meta.single_end = true
|
def meta_new = meta.clone()
|
||||||
[meta, reads]
|
meta_new.single_end = true
|
||||||
|
[meta_new, reads]
|
||||||
}
|
}
|
||||||
)
|
|
||||||
|
|
||||||
CAT_FASTQ(
|
|
||||||
ENSURE_FASTQ_EXTENSION1.out.reads
|
|
||||||
.groupTuple()
|
.groupTuple()
|
||||||
)
|
.map { meta, fastq -> [meta, fastq.flatten()] }
|
||||||
|
|
||||||
ENSURE_FASTQ_EXTENSION2(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
|
|
||||||
|
CAT_FASTQ(ch_concat_fastq)
|
||||||
|
|
||||||
ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
|
ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
|
||||||
.mix(ENSURE_FASTQ_EXTENSION2.out.reads)
|
.mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
ENSURE_FASTQ_EXTENSION1(
|
ch_adapterremoval_reads_prepped = ADAPTERREMOVAL_PAIRED.out.paired_truncated
|
||||||
ADAPTERREMOVAL_PAIRED.out.pair1_truncated
|
.mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
|
||||||
.map { meta, reads ->
|
|
||||||
meta.single_end = true
|
|
||||||
[meta, reads]
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
ENSURE_FASTQ_EXTENSION2(
|
|
||||||
ADAPTERREMOVAL_PAIRED.out.pair2_truncated
|
|
||||||
.map { meta, reads ->
|
|
||||||
meta.single_end = true
|
|
||||||
[meta, reads]
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
ENSURE_FASTQ_EXTENSION3(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
|
|
||||||
|
|
||||||
ch_adapterremoval_reads_prepped = ENSURE_FASTQ_EXTENSION1.out.reads
|
|
||||||
.join(ENSURE_FASTQ_EXTENSION2.out.reads)
|
|
||||||
.groupTuple()
|
|
||||||
.map { meta, pair1, pair2 ->
|
|
||||||
meta.single_end = false
|
|
||||||
[ meta, [ pair1, pair2 ].flatten() ]
|
|
||||||
}
|
|
||||||
.mix(ENSURE_FASTQ_EXTENSION3.out.reads)
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ch_versions = ch_versions.mix( ADAPTERREMOVAL_SINGLE.out.versions.first() )
|
ch_versions = ch_versions.mix( ADAPTERREMOVAL_SINGLE.out.versions.first() )
|
||||||
ch_versions = ch_versions.mix( ADAPTERREMOVAL_PAIRED.out.versions.first() )
|
ch_versions = ch_versions.mix( ADAPTERREMOVAL_PAIRED.out.versions.first() )
|
||||||
|
|
||||||
ch_multiqc_files = ch_multiqc_files.mix(
|
ch_multiqc_files = ch_multiqc_files.mix(
|
||||||
ADAPTERREMOVAL_PAIRED.out.log.collect{it[1]},
|
ADAPTERREMOVAL_PAIRED.out.settings,
|
||||||
ADAPTERREMOVAL_SINGLE.out.log.collect{it[1]}
|
ADAPTERREMOVAL_SINGLE.out.settings
|
||||||
)
|
)
|
||||||
|
|
||||||
emit:
|
emit:
|
||||||
|
|
32
subworkflows/local/shortread_complexityfiltering.nf
Normal file
32
subworkflows/local/shortread_complexityfiltering.nf
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
//
|
||||||
|
// Check input samplesheet and get read channels
|
||||||
|
//
|
||||||
|
|
||||||
|
include { BBMAP_BBDUK } from '../../modules/nf-core/modules/bbmap/bbduk/main'
|
||||||
|
include { PRINSEQPLUSPLUS } from '../../modules/nf-core/modules/prinseqplusplus/main'
|
||||||
|
|
||||||
|
workflow SHORTREAD_COMPLEXITYFILTERING {
|
||||||
|
take:
|
||||||
|
reads // [ [ meta ], [ reads ] ]
|
||||||
|
|
||||||
|
main:
|
||||||
|
ch_versions = Channel.empty()
|
||||||
|
ch_multiqc_files = Channel.empty()
|
||||||
|
|
||||||
|
if ( params.shortread_complexityfilter_tool == 'bbduk' ) {
|
||||||
|
ch_filtered_reads = BBMAP_BBDUK ( reads, [] ).reads
|
||||||
|
ch_versions = ch_versions.mix( BBMAP_BBDUK.out.versions.first() )
|
||||||
|
ch_multiqc_files = ch_multiqc_files.mix( BBMAP_BBDUK.out.log )
|
||||||
|
} else if ( params.shortread_complexityfilter_tool == 'prinseqplusplus' ) {
|
||||||
|
ch_filtered_reads = PRINSEQPLUSPLUS ( reads ).good_reads
|
||||||
|
ch_versions = ch_versions.mix( PRINSEQPLUSPLUS.out.versions.first() )
|
||||||
|
} else {
|
||||||
|
ch_filtered_reads = reads
|
||||||
|
}
|
||||||
|
|
||||||
|
emit:
|
||||||
|
reads = ch_filtered_reads // channel: [ val(meta), [ reads ] ]
|
||||||
|
versions = ch_versions // channel: [ versions.yml ]
|
||||||
|
mqc = ch_multiqc_files
|
||||||
|
}
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
/*
|
//
|
||||||
Process short raw reads with FastP
|
// Process short raw reads with FastP
|
||||||
*/
|
//
|
||||||
|
|
||||||
include { FASTP as FASTP_SINGLE } from '../../modules/nf-core/modules/fastp/main'
|
include { FASTP as FASTP_SINGLE } from '../../modules/nf-core/modules/fastp/main'
|
||||||
include { FASTP as FASTP_PAIRED } from '../../modules/nf-core/modules/fastp/main'
|
include { FASTP as FASTP_PAIRED } from '../../modules/nf-core/modules/fastp/main'
|
||||||
|
@ -44,8 +44,8 @@ workflow SHORTREAD_FASTP {
|
||||||
|
|
||||||
ch_processed_reads = ch_fastp_reads_prepped
|
ch_processed_reads = ch_fastp_reads_prepped
|
||||||
|
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( FASTP_SINGLE.out.json.collect{it[1]} )
|
ch_multiqc_files = ch_multiqc_files.mix( FASTP_SINGLE.out.json )
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( FASTP_PAIRED.out.json.collect{it[1]} )
|
ch_multiqc_files = ch_multiqc_files.mix( FASTP_PAIRED.out.json )
|
||||||
|
|
||||||
emit:
|
emit:
|
||||||
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
|
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
//
|
//
|
||||||
// Check input samplesheet and get read channels
|
// Perform read trimming and merging
|
||||||
//
|
//
|
||||||
|
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@ include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules
|
||||||
|
|
||||||
workflow SHORTREAD_PREPROCESSING {
|
workflow SHORTREAD_PREPROCESSING {
|
||||||
take:
|
take:
|
||||||
reads // file: /path/to/samplesheet.csv
|
reads // [ [ meta ], [ reads ] ]
|
||||||
|
|
||||||
main:
|
main:
|
||||||
ch_versions = Channel.empty()
|
ch_versions = Channel.empty()
|
||||||
|
@ -29,7 +29,7 @@ workflow SHORTREAD_PREPROCESSING {
|
||||||
|
|
||||||
FASTQC_PROCESSED ( ch_processed_reads )
|
FASTQC_PROCESSED ( ch_processed_reads )
|
||||||
ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
|
ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip.collect{it[1]} )
|
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
|
||||||
|
|
||||||
emit:
|
emit:
|
||||||
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
|
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
|
||||||
|
|
|
@ -17,7 +17,7 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true
|
||||||
// Check mandatory parameters
|
// Check mandatory parameters
|
||||||
if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
|
if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
|
||||||
if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
|
if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
|
||||||
if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] warning: MALT does not except uncollapsed paired-reads. Pairs will be profiled as separate files."
|
if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] warning: MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files."
|
||||||
if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "[nf-core/taxprofiler] error: cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs"
|
if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "[nf-core/taxprofiler] error: cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -43,6 +43,7 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check'
|
||||||
include { DB_CHECK } from '../subworkflows/local/db_check'
|
include { DB_CHECK } from '../subworkflows/local/db_check'
|
||||||
include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing'
|
include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing'
|
||||||
include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing'
|
include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing'
|
||||||
|
include { SHORTREAD_COMPLEXITYFILTERING } from '../subworkflows/local/shortread_complexityfiltering'
|
||||||
|
|
||||||
/*
|
/*
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
@ -60,7 +61,7 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/
|
||||||
include { CAT_FASTQ } from '../modules/nf-core/modules/cat/fastq/main'
|
include { CAT_FASTQ } from '../modules/nf-core/modules/cat/fastq/main'
|
||||||
include { MALT_RUN } from '../modules/nf-core/modules/malt/run/main'
|
include { MALT_RUN } from '../modules/nf-core/modules/malt/run/main'
|
||||||
include { KRAKEN2_KRAKEN2 } from '../modules/nf-core/modules/kraken2/kraken2/main'
|
include { KRAKEN2_KRAKEN2 } from '../modules/nf-core/modules/kraken2/kraken2/main'
|
||||||
include { CENTRIFUGE } from '../modules/nf-core/modules/centrifuge/main'
|
include { CENTRIFUGE_CENTRIFUGE } from '../modules/nf-core/modules/centrifuge/centrifuge/main'
|
||||||
include { METAPHLAN3 } from '../modules/nf-core/modules/metaphlan3/main'
|
include { METAPHLAN3 } from '../modules/nf-core/modules/metaphlan3/main'
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -91,7 +92,7 @@ workflow TAXPROFILER {
|
||||||
/*
|
/*
|
||||||
MODULE: Run FastQC
|
MODULE: Run FastQC
|
||||||
*/
|
*/
|
||||||
ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore ).dump(tag: "input_to_fastq")
|
ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore )
|
||||||
|
|
||||||
FASTQC (
|
FASTQC (
|
||||||
ch_input_for_fastqc
|
ch_input_for_fastqc
|
||||||
|
@ -99,10 +100,6 @@ workflow TAXPROFILER {
|
||||||
|
|
||||||
ch_versions = ch_versions.mix(FASTQC.out.versions.first())
|
ch_versions = ch_versions.mix(FASTQC.out.versions.first())
|
||||||
|
|
||||||
CUSTOM_DUMPSOFTWAREVERSIONS (
|
|
||||||
ch_versions.unique().collectFile(name: 'collated_versions.yml')
|
|
||||||
)
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
SUBWORKFLOW: PERFORM PREPROCESSING
|
SUBWORKFLOW: PERFORM PREPROCESSING
|
||||||
*/
|
*/
|
||||||
|
@ -115,17 +112,26 @@ workflow TAXPROFILER {
|
||||||
if ( params.longread_clip ) {
|
if ( params.longread_clip ) {
|
||||||
ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads
|
ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads
|
||||||
.map { it -> [ it[0], [it[1]] ] }
|
.map { it -> [ it[0], [it[1]] ] }
|
||||||
ch_versions = ch_versions.mix(LONGREAD_PREPROCESSING.out.versions.first())
|
|
||||||
} else {
|
} else {
|
||||||
ch_longreads_preprocessed = INPUT_CHECK.out.nanopore
|
ch_longreads_preprocessed = INPUT_CHECK.out.nanopore
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
SUBWORKFLOW: COMPLEXITY FILTERING
|
||||||
|
*/
|
||||||
|
|
||||||
|
if ( params.shortread_complexityfilter ) {
|
||||||
|
ch_shortreads_filtered = SHORTREAD_COMPLEXITYFILTERING ( ch_shortreads_preprocessed ).reads
|
||||||
|
} else {
|
||||||
|
ch_shortreads_filtered = ch_shortreads_preprocessed
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
COMBINE READS WITH POSSIBLE DATABASES
|
COMBINE READS WITH POSSIBLE DATABASES
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
|
// e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
|
||||||
ch_input_for_profiling = ch_shortreads_preprocessed
|
ch_input_for_profiling = ch_shortreads_filtered
|
||||||
.mix( ch_longreads_preprocessed )
|
.mix( ch_longreads_preprocessed )
|
||||||
.combine(DB_CHECK.out.dbs)
|
.combine(DB_CHECK.out.dbs)
|
||||||
.branch {
|
.branch {
|
||||||
|
@ -179,7 +185,6 @@ workflow TAXPROFILER {
|
||||||
// RUN PROFILING
|
// RUN PROFILING
|
||||||
//
|
//
|
||||||
ch_input_for_metaphlan3 = ch_input_for_profiling.metaphlan3
|
ch_input_for_metaphlan3 = ch_input_for_profiling.metaphlan3
|
||||||
.dump(tag: "input_metaphlan3")
|
|
||||||
.multiMap {
|
.multiMap {
|
||||||
it ->
|
it ->
|
||||||
reads: [it[0] + it[2], it[1]]
|
reads: [it[0] + it[2], it[1]]
|
||||||
|
@ -198,7 +203,7 @@ workflow TAXPROFILER {
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( params.run_centrifuge ) {
|
if ( params.run_centrifuge ) {
|
||||||
CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_db_name, params.centrifuge_save_unaligned, params.centrifuge_save_aligned, params.centrifuge_sam_format )
|
CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_unaligned, params.centrifuge_save_aligned, params.centrifuge_sam_format )
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( params.run_metaphlan3 ) {
|
if ( params.run_metaphlan3 ) {
|
||||||
|
@ -208,6 +213,12 @@ workflow TAXPROFILER {
|
||||||
/*
|
/*
|
||||||
MODULE: MultiQC
|
MODULE: MultiQC
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
CUSTOM_DUMPSOFTWAREVERSIONS (
|
||||||
|
ch_versions.unique().collectFile(name: 'collated_versions.yml')
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
workflow_summary = WorkflowTaxprofiler.paramsSummaryMultiqc(workflow, summary_params)
|
workflow_summary = WorkflowTaxprofiler.paramsSummaryMultiqc(workflow, summary_params)
|
||||||
ch_workflow_summary = Channel.value(workflow_summary)
|
ch_workflow_summary = Channel.value(workflow_summary)
|
||||||
|
|
||||||
|
@ -219,21 +230,30 @@ workflow TAXPROFILER {
|
||||||
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
|
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
|
||||||
|
|
||||||
if (params.shortread_clipmerge) {
|
if (params.shortread_clipmerge) {
|
||||||
ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_PREPROCESSING.out.mqc)
|
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
|
||||||
}
|
ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions )
|
||||||
if (params.longread_clip) {
|
}
|
||||||
ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_PREPROCESSING.out.mqc)
|
|
||||||
}
|
if (params.longread_clip) {
|
||||||
if (params.run_kraken2) {
|
ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
|
||||||
ch_multiqc_files = ch_multiqc_files.mix(KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]))
|
ch_versions = ch_versions.mix( LONGREAD_PREPROCESSING.out.versions )
|
||||||
ch_versions = ch_versions.mix(KRAKEN2_KRAKEN2.out.versions.first())
|
}
|
||||||
}
|
|
||||||
if (params.run_malt) {
|
if (params.shortread_complexityfilter){
|
||||||
ch_multiqc_files = ch_multiqc_files.mix(MALT_RUN.out.log.collect{it[1]}.ifEmpty([]))
|
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_COMPLEXITYFILTERING.out.mqc.collect{it[1]}.ifEmpty([]) )
|
||||||
ch_versions = ch_versions.mix(MALT_RUN.out.versions.first())
|
ch_versions = ch_versions.mix( SHORTREAD_COMPLEXITYFILTERING.out.versions )
|
||||||
|
}
|
||||||
|
|
||||||
|
if (params.run_kraken2) {
|
||||||
|
ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) )
|
||||||
|
ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() )
|
||||||
|
}
|
||||||
|
|
||||||
|
if (params.run_malt) {
|
||||||
|
ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) )
|
||||||
|
ch_versions = ch_versions.mix( MALT_RUN.out.versions.first() )
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO MALT results overwriting per database?
|
|
||||||
// TODO Versions for Karken/MALT not report?
|
// TODO Versions for Karken/MALT not report?
|
||||||
// TODO create multiQC module for metaphlan
|
// TODO create multiQC module for metaphlan
|
||||||
MULTIQC (
|
MULTIQC (
|
||||||
|
|
Loading…
Reference in a new issue