mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-22 11:19:54 +00:00
Add centrifuge classificatioN
This commit is contained in:
commit
d897c922b2
21 changed files with 751 additions and 159 deletions
14
.github/workflows/ci.yml
vendored
14
.github/workflows/ci.yml
vendored
|
@ -28,6 +28,10 @@ jobs:
|
||||||
# Test latest edge release of Nextflow
|
# Test latest edge release of Nextflow
|
||||||
- NXF_VER: ""
|
- NXF_VER: ""
|
||||||
NXF_EDGE: "1"
|
NXF_EDGE: "1"
|
||||||
|
parameters:
|
||||||
|
- "--shortread_clipmerge_tool fastp"
|
||||||
|
- "--shortread_clipmerge_tool adapterremoval"
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Check out pipeline code
|
- name: Check out pipeline code
|
||||||
uses: actions/checkout@v2
|
uses: actions/checkout@v2
|
||||||
|
@ -42,11 +46,19 @@ jobs:
|
||||||
wget -qO- get.nextflow.io | bash
|
wget -qO- get.nextflow.io | bash
|
||||||
sudo mv nextflow /usr/local/bin/
|
sudo mv nextflow /usr/local/bin/
|
||||||
|
|
||||||
|
- name: Show current locale
|
||||||
|
run: locale
|
||||||
|
|
||||||
|
- name: Set UTF-8 enabled locale
|
||||||
|
run: |
|
||||||
|
sudo locale-gen en_US.UTF-8
|
||||||
|
sudo update-locale LANG=en_US.UTF-8
|
||||||
|
|
||||||
- name: Run pipeline with test data
|
- name: Run pipeline with test data
|
||||||
# TODO nf-core: You can customise CI pipeline run tests as required
|
# TODO nf-core: You can customise CI pipeline run tests as required
|
||||||
# For example: adding multiple test runs with different parameters
|
# For example: adding multiple test runs with different parameters
|
||||||
# Remember that you can parallelise this by using strategy.matrix
|
# Remember that you can parallelise this by using strategy.matrix
|
||||||
run: |
|
run: |
|
||||||
nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results
|
nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results ${{ matrix.parameters }}
|
||||||
|
|
||||||
#
|
#
|
||||||
|
|
23
CITATIONS.md
23
CITATIONS.md
|
@ -13,9 +13,30 @@
|
||||||
- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)
|
- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)
|
||||||
|
|
||||||
- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/)
|
- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/)
|
||||||
|
|
||||||
> Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.
|
> Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.
|
||||||
|
|
||||||
* [Porechop](https://github.com/rrwick/Porechop)
|
- [fastp](https://doi.org/10.1093/bioinformatics/bty560)
|
||||||
|
|
||||||
|
> Chen, Shifu, Yanqing Zhou, Yaru Chen, and Jia Gu. 2018. “Fastp: An Ultra-Fast All-in-One FASTQ Preprocessor.” Bioinformatics 34 (17): i884-90. 10.1093/bioinformatics/bty560.
|
||||||
|
|
||||||
|
- [AdapterRemoval2](https://doi.org/10.1186/s13104-016-1900-2)
|
||||||
|
|
||||||
|
> Schubert, Mikkel, Stinus Lindgreen, and Ludovic Orlando. 2016. “AdapterRemoval v2: Rapid Adapter Trimming, Identification, and Read Merging.” BMC Research Notes 9 (February): 88. doi:10.1186/s13104-016-1900-2.
|
||||||
|
|
||||||
|
- [Porechop](https://github.com/rrwick/Porechop)
|
||||||
|
|
||||||
|
- [Kraken2](https://doi.org/10.1186/s13059-019-1891-0)
|
||||||
|
|
||||||
|
> Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. “Improved Metagenomic Analysis with Kraken 2.” Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0.
|
||||||
|
|
||||||
|
- [MALT](https://doi.org/10.1038/s41559-017-0446-6)
|
||||||
|
|
||||||
|
> Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. “Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico.” Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6.
|
||||||
|
|
||||||
|
- [MetaPhlAn3](https://doi.org/10.7554/eLife.65088)
|
||||||
|
|
||||||
|
> Beghini, Francesco, Lauren J McIver, Aitor Blanco-Míguez, Leonard Dubois, Francesco Asnicar, Sagun Maharjan, Ana Mailyan, et al. 2021. “Integrating Taxonomic, Functional, and Strain-Level Profiling of Diverse Microbial Communities with BioBakery 3.” Edited by Peter Turnbaugh, Eduardo Franco, and C Titus Brown. ELife 10 (May): e65088.
|
||||||
|
|
||||||
## Software packaging/containerisation tools
|
## Software packaging/containerisation tools
|
||||||
|
|
||||||
|
|
|
@ -52,13 +52,25 @@ process {
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
withName: FASTP {
|
withName: FASTQC_PROCESSED {
|
||||||
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
ext.args = '--quiet'
|
||||||
// TODO also include option to NOT merge
|
ext.prefix = { "${meta.id}_${meta.run_accession}_processed" }
|
||||||
|
publishDir = [
|
||||||
|
path: { "${params.outdir}/fastqc/processed" },
|
||||||
|
mode: 'copy',
|
||||||
|
pattern: '*.html'
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
withName: FASTP_SINGLE {
|
||||||
ext.args = [
|
ext.args = [
|
||||||
{ ${meta.single_end} } == 0 ? "-m" : '',
|
// trimming options
|
||||||
params.shortread_excludeunmerged ? '' : "--include_unmerged"
|
params.shortread_clipmerge_skipadaptertrim ? "--disable_adapter_trimming" : "",
|
||||||
|
params.shortread_clipmerge_adapter1 ? "--adapter_sequence ${params.shortread_clipmerge_adapter1}" : "",
|
||||||
|
// filtering options
|
||||||
|
"--length_required ${params.shortread_clipmerge_minlength}"
|
||||||
].join(' ').trim()
|
].join(' ').trim()
|
||||||
|
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||||
publishDir = [
|
publishDir = [
|
||||||
path: { "${params.outdir}/fastp" },
|
path: { "${params.outdir}/fastp" },
|
||||||
mode: 'copy',
|
mode: 'copy',
|
||||||
|
@ -66,6 +78,61 @@ process {
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
withName: FASTP_PAIRED {
|
||||||
|
ext.args = [
|
||||||
|
// collapsing options - option to retain singletons
|
||||||
|
params.shortread_clipmerge_excludeunmerged ? '' : "--include_unmerged",
|
||||||
|
// trimming options
|
||||||
|
params.shortread_clipmerge_skipadaptertrim ? "--disable_adapter_trimming" : "",
|
||||||
|
params.shortread_clipmerge_adapter1 ? "--adapter_sequence ${params.shortread_clipmerge_adapter1}" : "",
|
||||||
|
params.shortread_clipmerge_adapter2 ? "--adapter_sequence_r2 ${params.shortread_clipmerge_adapter2}" : "--detect_adapter_for_pe",
|
||||||
|
// filtering options
|
||||||
|
"--length_required ${params.shortread_clipmerge_minlength}"
|
||||||
|
].join(' ').trim()
|
||||||
|
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||||
|
publishDir = [
|
||||||
|
path: { "${params.outdir}/fastp" },
|
||||||
|
mode: 'copy',
|
||||||
|
pattern: '*.fastq.gz'
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
withName: ADAPTERREMOVAL_SINGLE {
|
||||||
|
ext.args = [
|
||||||
|
// trimming options
|
||||||
|
params.shortread_clipmerge_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
|
||||||
|
params.shortread_clipmerge_adapter1 ? "--adapter1 ${params.shortread_clipmerge_adapter1}" : "",
|
||||||
|
// filtering options
|
||||||
|
"--minlength ${params.shortread_clipmerge_minlength}"
|
||||||
|
].join(' ').trim()
|
||||||
|
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||||
|
publishDir = [
|
||||||
|
path: { "${params.outdir}/adapterremoval" },
|
||||||
|
mode: 'copy',
|
||||||
|
pattern: '*.fastq.gz'
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
withName: ADAPTERREMOVAL_PAIRED {
|
||||||
|
ext.args = [
|
||||||
|
// collapsing options
|
||||||
|
params.shortread_clipmerge_mergepairs ? "--collapse" : "",
|
||||||
|
// trimming options
|
||||||
|
params.shortread_clipmerge_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
|
||||||
|
params.shortread_clipmerge_adapter1 ? "--adapter1 ${params.shortread_clipmerge_adapter1}" : "",
|
||||||
|
params.shortread_clipmerge_adapter2 ? "--adapter2 ${params.shortread_clipmerge_adapter2}" : "",
|
||||||
|
// filtering options
|
||||||
|
"--minlength ${params.shortread_clipmerge_minlength}"
|
||||||
|
].join(' ').trim()
|
||||||
|
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||||
|
publishDir = [
|
||||||
|
path: { "${params.outdir}/adapterremoval" },
|
||||||
|
mode: 'copy',
|
||||||
|
pattern: '*.fastq.gz'
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
withName: PORECHOP {
|
withName: PORECHOP {
|
||||||
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||||
publishDir = [
|
publishDir = [
|
||||||
|
@ -75,16 +142,6 @@ process {
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
withName: FASTQC_POST {
|
|
||||||
ext.args = '--quiet'
|
|
||||||
ext.prefix = { "${meta.id}_${meta.run_accession}_processed" }
|
|
||||||
publishDir = [
|
|
||||||
path: { "${params.outdir}/fastqc/processed" },
|
|
||||||
mode: 'copy',
|
|
||||||
pattern: '*.html'
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
withName: CAT_FASTQ {
|
withName: CAT_FASTQ {
|
||||||
publishDir = [
|
publishDir = [
|
||||||
path: { "${params.outdir}/prepared_sequences" },
|
path: { "${params.outdir}/prepared_sequences" },
|
||||||
|
@ -94,23 +151,32 @@ process {
|
||||||
}
|
}
|
||||||
|
|
||||||
withName: MALT_RUN {
|
withName: MALT_RUN {
|
||||||
|
ext.args = { "${meta.db_params}" }
|
||||||
|
ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
|
||||||
publishDir = [
|
publishDir = [
|
||||||
path: { "${params.outdir}/malt/${meta.db_name}" },
|
path: { "${params.outdir}/malt/${meta.db_name}" },
|
||||||
mode: 'copy',
|
mode: 'copy',
|
||||||
pattern: '*.{rma6,tab,text,sam,log}'
|
pattern: '*.{rma6,tab,text,sam,log}'
|
||||||
]
|
]
|
||||||
ext.args = { "${meta.db_params}" }
|
|
||||||
ext.prefix = { "${meta.id}-${meta.db_name}" }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
withName: KRAKEN2_KRAKEN2 {
|
withName: KRAKEN2_KRAKEN2 {
|
||||||
|
ext.args = { "${meta.db_params}" }
|
||||||
|
ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
|
||||||
publishDir = [
|
publishDir = [
|
||||||
path: { "${params.outdir}/kraken2/${meta.db_name}" },
|
path: { "${params.outdir}/kraken2/${meta.db_name}" },
|
||||||
mode: 'copy',
|
mode: 'copy',
|
||||||
pattern: '*.{fastq.gz,txt}'
|
pattern: '*.{fastq.gz,txt}'
|
||||||
]
|
]
|
||||||
ext.args = { "${meta.db_params}" }
|
}
|
||||||
ext.prefix = { "${meta.id}-${meta.db_name}" }
|
|
||||||
|
withName: METAPHLAN3 {
|
||||||
|
publishDir = [
|
||||||
|
path: { "${params.outdir}/metaphlan3/${meta.db_name}" },
|
||||||
|
mode: params.publish_dir_mode,
|
||||||
|
pattern: '*.{biom,txt}'
|
||||||
|
]
|
||||||
|
ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
|
||||||
}
|
}
|
||||||
|
|
||||||
withName: CUSTOM_DUMPSOFTWAREVERSIONS {
|
withName: CUSTOM_DUMPSOFTWAREVERSIONS {
|
||||||
|
@ -128,7 +194,7 @@ process {
|
||||||
pattern: '*.{fastq.gz,txt}'
|
pattern: '*.{fastq.gz,txt}'
|
||||||
]
|
]
|
||||||
ext.args = { "${meta.db_params}" }
|
ext.args = { "${meta.db_params}" }
|
||||||
ext.prefix = { "${meta.id}-${meta.db_name}" }
|
ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,10 +23,10 @@ params {
|
||||||
// TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
|
// TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
|
||||||
// TODO nf-core: Give any required params for the test so that command line flags are not needed
|
// TODO nf-core: Give any required params for the test so that command line flags are not needed
|
||||||
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
|
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
|
||||||
outdir = "./results"
|
|
||||||
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
|
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
|
||||||
run_kraken2 = true
|
run_kraken2 = true
|
||||||
run_malt = true
|
run_malt = true
|
||||||
|
run_metaphlan3 = true
|
||||||
shortread_clipmerge = true
|
shortread_clipmerge = true
|
||||||
run_centrifuge = true
|
run_centrifuge = true
|
||||||
|
|
||||||
|
|
12
modules.json
12
modules.json
|
@ -3,6 +3,9 @@
|
||||||
"homePage": "https://github.com/nf-core/taxprofiler",
|
"homePage": "https://github.com/nf-core/taxprofiler",
|
||||||
"repos": {
|
"repos": {
|
||||||
"nf-core/modules": {
|
"nf-core/modules": {
|
||||||
|
"adapterremoval": {
|
||||||
|
"git_sha": "f0800157544a82ae222931764483331a81812012"
|
||||||
|
},
|
||||||
"cat/fastq": {
|
"cat/fastq": {
|
||||||
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
||||||
},
|
},
|
||||||
|
@ -21,17 +24,20 @@
|
||||||
"malt/run": {
|
"malt/run": {
|
||||||
"git_sha": "72b96f4e504eef673f2b5c13560a9d90b669129b"
|
"git_sha": "72b96f4e504eef673f2b5c13560a9d90b669129b"
|
||||||
},
|
},
|
||||||
"multiqc": {
|
"metaphlan3": {
|
||||||
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
||||||
},
|
},
|
||||||
"untar": {
|
"multiqc": {
|
||||||
"git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918"
|
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
||||||
},
|
},
|
||||||
"porechop": {
|
"porechop": {
|
||||||
"git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046"
|
"git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046"
|
||||||
},
|
},
|
||||||
"centrifuge": {
|
"centrifuge": {
|
||||||
"git_sha": "ea41a8a6f761b9993d857570e872abaae3fea555"
|
"git_sha": "ea41a8a6f761b9993d857570e872abaae3fea555"
|
||||||
|
},
|
||||||
|
"untar": {
|
||||||
|
"git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
31
modules/local/ensure_fastq_extension.nf
Normal file
31
modules/local/ensure_fastq_extension.nf
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
process ENSURE_FASTQ_EXTENSION {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_low'
|
||||||
|
|
||||||
|
conda (params.enable_conda ? "conda-forge::bash=5.0" : null)
|
||||||
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
|
'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv2/biocontainers_v1.2.0_cv2.img' :
|
||||||
|
'biocontainers/biocontainers:v1.2.0_cv2' }"
|
||||||
|
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(reads)
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path('*.fastq.gz'), emit: reads
|
||||||
|
|
||||||
|
script:
|
||||||
|
if (meta.single_end) {
|
||||||
|
fastq = "${reads.baseName}.fastq.gz"
|
||||||
|
"""
|
||||||
|
ln -s '${reads}' '${fastq}'
|
||||||
|
"""
|
||||||
|
} else {
|
||||||
|
first = "${reads[0].baseName}.fastq.gz"
|
||||||
|
second = "${reads[1].baseName}.fastq.gz"
|
||||||
|
"""
|
||||||
|
ln -s '${reads[0]}' '${first}'
|
||||||
|
ln -s '${reads[1]}' '${second}'
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
}
|
70
modules/nf-core/modules/adapterremoval/main.nf
generated
Normal file
70
modules/nf-core/modules/adapterremoval/main.nf
generated
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
process ADAPTERREMOVAL {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_medium'
|
||||||
|
|
||||||
|
conda (params.enable_conda ? "bioconda::adapterremoval=2.3.2" : null)
|
||||||
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
|
'https://depot.galaxyproject.org/singularity/adapterremoval:2.3.2--hb7ba0dd_0' :
|
||||||
|
'quay.io/biocontainers/adapterremoval:2.3.2--hb7ba0dd_0' }"
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(reads)
|
||||||
|
path(adapterlist)
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path("${prefix}.truncated.gz") , optional: true, emit: singles_truncated
|
||||||
|
tuple val(meta), path("${prefix}.discarded.gz") , optional: true, emit: discarded
|
||||||
|
tuple val(meta), path("${prefix}.pair1.truncated.gz") , optional: true, emit: pair1_truncated
|
||||||
|
tuple val(meta), path("${prefix}.pair2.truncated.gz") , optional: true, emit: pair2_truncated
|
||||||
|
tuple val(meta), path("${prefix}.collapsed.gz") , optional: true, emit: collapsed
|
||||||
|
tuple val(meta), path("${prefix}.collapsed.truncated.gz") , optional: true, emit: collapsed_truncated
|
||||||
|
tuple val(meta), path("${prefix}.paired.gz") , optional: true, emit: paired_interleaved
|
||||||
|
tuple val(meta), path('*.log') , emit: log
|
||||||
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
|
when:
|
||||||
|
task.ext.when == null || task.ext.when
|
||||||
|
|
||||||
|
script:
|
||||||
|
def args = task.ext.args ?: ''
|
||||||
|
def list = adapterlist ? "--adapter-list ${adapterlist}" : ""
|
||||||
|
prefix = task.ext.prefix ?: "${meta.id}"
|
||||||
|
|
||||||
|
if (meta.single_end) {
|
||||||
|
"""
|
||||||
|
AdapterRemoval \\
|
||||||
|
--file1 $reads \\
|
||||||
|
$args \\
|
||||||
|
$adapterlist \\
|
||||||
|
--basename ${prefix} \\
|
||||||
|
--threads ${task.cpus} \\
|
||||||
|
--settings ${prefix}.log \\
|
||||||
|
--seed 42 \\
|
||||||
|
--gzip
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
} else {
|
||||||
|
"""
|
||||||
|
AdapterRemoval \\
|
||||||
|
--file1 ${reads[0]} \\
|
||||||
|
--file2 ${reads[1]} \\
|
||||||
|
$args \\
|
||||||
|
$adapterlist \\
|
||||||
|
--basename ${prefix} \\
|
||||||
|
--threads $task.cpus \\
|
||||||
|
--settings ${prefix}.log \\
|
||||||
|
--seed 42 \\
|
||||||
|
--gzip
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
90
modules/nf-core/modules/adapterremoval/meta.yml
generated
Normal file
90
modules/nf-core/modules/adapterremoval/meta.yml
generated
Normal file
|
@ -0,0 +1,90 @@
|
||||||
|
name: adapterremoval
|
||||||
|
description: Trim sequencing adapters and collapse overlapping reads
|
||||||
|
keywords:
|
||||||
|
- trimming
|
||||||
|
- adapters
|
||||||
|
- merging
|
||||||
|
- fastq
|
||||||
|
tools:
|
||||||
|
- adapterremoval:
|
||||||
|
description: The AdapterRemoval v2 tool for merging and clipping reads.
|
||||||
|
homepage: https://github.com/MikkelSchubert/adapterremoval
|
||||||
|
documentation: https://adapterremoval.readthedocs.io
|
||||||
|
licence: ["GPL v3"]
|
||||||
|
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- reads:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
|
||||||
|
respectively.
|
||||||
|
pattern: "*.{fq,fastq,fq.gz,fastq.gz}"
|
||||||
|
- adapterlist:
|
||||||
|
type: file
|
||||||
|
description: Optional text file containing list of adapters to look for for removal
|
||||||
|
with one adapter per line. Otherwise will look for default adapters (see
|
||||||
|
AdapterRemoval man page), or can be modified to remove user-specified
|
||||||
|
adapters via ext.args.
|
||||||
|
|
||||||
|
output:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- singles_truncated:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
Adapter trimmed FastQ files of either single-end reads, or singleton
|
||||||
|
'orphaned' reads from merging of paired-end data (i.e., one of the pair
|
||||||
|
was lost due to filtering thresholds).
|
||||||
|
pattern: "*.truncated.gz"
|
||||||
|
- discarded:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
Adapter trimmed FastQ files of reads that did not pass filtering
|
||||||
|
thresholds.
|
||||||
|
pattern: "*.discarded.gz"
|
||||||
|
- pair1_truncated:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
Adapter trimmed R1 FastQ files of paired-end reads that did not merge
|
||||||
|
with their respective R2 pair due to long templates. The respective pair
|
||||||
|
is stored in 'pair2_truncated'.
|
||||||
|
pattern: "*.pair1.truncated.gz"
|
||||||
|
- pair2_truncated:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
Adapter trimmed R2 FastQ files of paired-end reads that did not merge
|
||||||
|
with their respective R1 pair due to long templates. The respective pair
|
||||||
|
is stored in 'pair1_truncated'.
|
||||||
|
pattern: "*.pair2.truncated.gz"
|
||||||
|
- collapsed:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
Collapsed FastQ of paired-end reads that successfully merged with their
|
||||||
|
respective R1 pair but were not trimmed.
|
||||||
|
pattern: "*.collapsed.gz"
|
||||||
|
- collapsed_truncated:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
Collapsed FastQ of paired-end reads that successfully merged with their
|
||||||
|
respective R1 pair and were trimmed of adapter due to sufficient overlap.
|
||||||
|
pattern: "*.collapsed.truncated.gz"
|
||||||
|
- log:
|
||||||
|
type: file
|
||||||
|
description: AdapterRemoval log file
|
||||||
|
pattern: "*.log"
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: "versions.yml"
|
||||||
|
|
||||||
|
authors:
|
||||||
|
- "@maxibor"
|
||||||
|
- "@jfy133"
|
4
modules/nf-core/modules/centrifuge/main.nf
generated
4
modules/nf-core/modules/centrifuge/main.nf
generated
|
@ -10,6 +10,7 @@ process CENTRIFUGE {
|
||||||
input:
|
input:
|
||||||
tuple val(meta), path(reads)
|
tuple val(meta), path(reads)
|
||||||
path db
|
path db
|
||||||
|
val db_name
|
||||||
val save_unaligned
|
val save_unaligned
|
||||||
val save_aligned
|
val save_aligned
|
||||||
val sam_format
|
val sam_format
|
||||||
|
@ -42,9 +43,8 @@ process CENTRIFUGE {
|
||||||
}
|
}
|
||||||
def sam_output = sam_format ? "--out-fmt 'sam'" : ''
|
def sam_output = sam_format ? "--out-fmt 'sam'" : ''
|
||||||
"""
|
"""
|
||||||
tar -xf $db
|
|
||||||
centrifuge \\
|
centrifuge \\
|
||||||
-x $db_name \\
|
-x ${db}/${db_name} \\
|
||||||
-p $task.cpus \\
|
-p $task.cpus \\
|
||||||
$paired \\
|
$paired \\
|
||||||
--report-file ${prefix}.report.txt \\
|
--report-file ${prefix}.report.txt \\
|
||||||
|
|
3
modules/nf-core/modules/centrifuge/meta.yml
generated
3
modules/nf-core/modules/centrifuge/meta.yml
generated
|
@ -27,6 +27,9 @@ input:
|
||||||
type: directory
|
type: directory
|
||||||
description: Centrifuge database in .tar.gz format
|
description: Centrifuge database in .tar.gz format
|
||||||
pattern: "*.tar.gz"
|
pattern: "*.tar.gz"
|
||||||
|
- db_name:
|
||||||
|
type: string
|
||||||
|
description: Centrifuge database filenames without the suffix ".cf"
|
||||||
- save_unaligned:
|
- save_unaligned:
|
||||||
type: value
|
type: value
|
||||||
description: If true unmapped fastq files are saved
|
description: If true unmapped fastq files are saved
|
||||||
|
|
45
modules/nf-core/modules/metaphlan3/main.nf
generated
Normal file
45
modules/nf-core/modules/metaphlan3/main.nf
generated
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
process METAPHLAN3 {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_high'
|
||||||
|
|
||||||
|
conda (params.enable_conda ? 'bioconda::metaphlan=3.0.12' : null)
|
||||||
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
|
'https://depot.galaxyproject.org/singularity/metaphlan:3.0.12--pyhb7b1952_0' :
|
||||||
|
'quay.io/biocontainers/metaphlan:3.0.12--pyhb7b1952_0' }"
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(input)
|
||||||
|
path metaphlan_db
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path("*_profile.txt") , emit: profile
|
||||||
|
tuple val(meta), path("*.biom") , emit: biom
|
||||||
|
tuple val(meta), path('*.bowtie2out.txt'), optional:true, emit: bt2out
|
||||||
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
|
when:
|
||||||
|
task.ext.when == null || task.ext.when
|
||||||
|
|
||||||
|
script:
|
||||||
|
def args = task.ext.args ?: ''
|
||||||
|
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||||
|
def input_type = ("$input".endsWith(".fastq.gz")) ? "--input_type fastq" : ("$input".contains(".fasta")) ? "--input_type fasta" : ("$input".endsWith(".bowtie2out.txt")) ? "--input_type bowtie2out" : "--input_type sam"
|
||||||
|
def input_data = ("$input_type".contains("fastq")) && !meta.single_end ? "${input[0]},${input[1]}" : "$input"
|
||||||
|
def bowtie2_out = "$input_type" == "--input_type bowtie2out" || "$input_type" == "--input_type sam" ? '' : "--bowtie2out ${prefix}.bowtie2out.txt"
|
||||||
|
|
||||||
|
"""
|
||||||
|
metaphlan \\
|
||||||
|
--nproc $task.cpus \\
|
||||||
|
$input_type \\
|
||||||
|
$input_data \\
|
||||||
|
$args \\
|
||||||
|
$bowtie2_out \\
|
||||||
|
--bowtie2db ${metaphlan_db} \\
|
||||||
|
--biom ${prefix}.biom \\
|
||||||
|
--output_file ${prefix}_profile.txt
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
metaphlan3: \$(metaphlan --version 2>&1 | awk '{print \$3}')
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
52
modules/nf-core/modules/metaphlan3/meta.yml
generated
Normal file
52
modules/nf-core/modules/metaphlan3/meta.yml
generated
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
name: metaphlan3
|
||||||
|
description: MetaPhlAn is a tool for profiling the composition of microbial communities from metagenomic shotgun sequencing data.
|
||||||
|
keywords:
|
||||||
|
- metagenomics
|
||||||
|
- classification
|
||||||
|
- fastq
|
||||||
|
- bam
|
||||||
|
- fasta
|
||||||
|
tools:
|
||||||
|
- metaphlan3:
|
||||||
|
description: Identify clades (phyla to species) present in the metagenome obtained from a microbiome sample and their relative abundance
|
||||||
|
homepage: https://huttenhower.sph.harvard.edu/metaphlan/
|
||||||
|
documentation: https://github.com/biobakery/MetaPhlAn
|
||||||
|
doi: "10.7554/eLife.65088"
|
||||||
|
licence: ["MIT License"]
|
||||||
|
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- input:
|
||||||
|
type: file
|
||||||
|
description: Metaphlan 3.0 can classify the metagenome from a variety of input data types, including FASTQ files (single-end and paired-end), FASTA, bowtie2-produced SAM files (produced from alignments to the MetaPHlAn marker database) and intermediate bowtie2 alignment files (bowtie2out)
|
||||||
|
pattern: "*.{fastq.gz, fasta, fasta.gz, sam, bowtie2out.txt}"
|
||||||
|
|
||||||
|
output:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: "versions.yml"
|
||||||
|
- profile:
|
||||||
|
type: file
|
||||||
|
description: Tab-separated output file of the predicted taxon relative abundances
|
||||||
|
pattern: "*.{txt}"
|
||||||
|
- biom:
|
||||||
|
type: file
|
||||||
|
description: General-use format for representing biological sample by observation contingency tables
|
||||||
|
pattern: "*.{biom}"
|
||||||
|
- bowtie2out:
|
||||||
|
type: file
|
||||||
|
description: Intermediate Bowtie2 output produced from mapping the metagenome against the MetaPHlAn marker database ( not compatible with `bowtie2out` files generated with MetaPhlAn versions below 3 )
|
||||||
|
pattern: "*.{bowtie2out.txt}"
|
||||||
|
|
||||||
|
authors:
|
||||||
|
- "@MGordon09"
|
|
@ -55,9 +55,15 @@ params {
|
||||||
databases = null
|
databases = null
|
||||||
|
|
||||||
// FASTQ preprocessing
|
// FASTQ preprocessing
|
||||||
shortread_clipmerge = false
|
shortread_clipmerge = false
|
||||||
shortread_excludeunmerged = true
|
shortread_clipmerge_tool = 'fastp'
|
||||||
longread_clip = false
|
shortread_clipmerge_skipadaptertrim = false
|
||||||
|
shortread_clipmerge_mergepairs = false
|
||||||
|
shortread_clipmerge_excludeunmerged = false
|
||||||
|
shortread_clipmerge_adapter1 = null
|
||||||
|
shortread_clipmerge_adapter2 = null
|
||||||
|
shortread_clipmerge_minlength = 15
|
||||||
|
longread_clip = false
|
||||||
|
|
||||||
// MALT
|
// MALT
|
||||||
run_malt = false
|
run_malt = false
|
||||||
|
@ -68,9 +74,12 @@ params {
|
||||||
|
|
||||||
// centrifuge
|
// centrifuge
|
||||||
run_centrifuge = false
|
run_centrifuge = false
|
||||||
|
centrifuge_db_name = false
|
||||||
centrifuge_save_unaligned = false
|
centrifuge_save_unaligned = false
|
||||||
centrifuge_save_aligned = false
|
centrifuge_save_aligned = false
|
||||||
centrifuge_sam_format = false
|
centrifuge_sam_format = false
|
||||||
|
// metaphlan3
|
||||||
|
run_metaphlan3 = false
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load base.config by default for all pipelines
|
// Load base.config by default for all pipelines
|
||||||
|
@ -155,7 +164,7 @@ if (!params.igenomes_ignore) {
|
||||||
// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable.
|
// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable.
|
||||||
|
|
||||||
env {
|
env {
|
||||||
PYTHONNOUSERSITE = 1
|
PYTHONNOUSERSITE = '1'
|
||||||
R_PROFILE_USER = "/.Rprofile"
|
R_PROFILE_USER = "/.Rprofile"
|
||||||
R_ENVIRON_USER = "/.Renviron"
|
R_ENVIRON_USER = "/.Renviron"
|
||||||
JULIA_DEPOT_PATH = "/usr/local/share/julia"
|
JULIA_DEPOT_PATH = "/usr/local/share/julia"
|
||||||
|
|
|
@ -10,7 +10,10 @@
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"fa_icon": "fas fa-terminal",
|
"fa_icon": "fas fa-terminal",
|
||||||
"description": "Define where the pipeline should find input data and save output data.",
|
"description": "Define where the pipeline should find input data and save output data.",
|
||||||
"required": ["input", "outdir"],
|
"required": [
|
||||||
|
"input",
|
||||||
|
"outdir"
|
||||||
|
],
|
||||||
"properties": {
|
"properties": {
|
||||||
"input": {
|
"input": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
|
@ -173,7 +176,14 @@
|
||||||
"description": "Method used to save pipeline results to output directory.",
|
"description": "Method used to save pipeline results to output directory.",
|
||||||
"help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
|
"help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
|
||||||
"fa_icon": "fas fa-copy",
|
"fa_icon": "fas fa-copy",
|
||||||
"enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
|
"enum": [
|
||||||
|
"symlink",
|
||||||
|
"rellink",
|
||||||
|
"link",
|
||||||
|
"copy",
|
||||||
|
"copyNoFollow",
|
||||||
|
"move"
|
||||||
|
],
|
||||||
"hidden": true
|
"hidden": true
|
||||||
},
|
},
|
||||||
"email_on_fail": {
|
"email_on_fail": {
|
||||||
|
@ -265,9 +275,9 @@
|
||||||
"shortread_clipmerge": {
|
"shortread_clipmerge": {
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
"shortread_excludeunmerged": {
|
"shortread_clipmerge_excludeunmerged": {
|
||||||
"type": "boolean",
|
"type": "boolean",
|
||||||
"default": true
|
"default": false
|
||||||
},
|
},
|
||||||
"longread_clip": {
|
"longread_clip": {
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
|
@ -293,6 +303,40 @@
|
||||||
},
|
},
|
||||||
"centrifuge_sam_format": {
|
"centrifuge_sam_format": {
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"run_metaphlan3": {
|
||||||
|
"type": "boolean",
|
||||||
|
"description": "Enable MetaPhlAn for taxonomic profiling"
|
||||||
|
},
|
||||||
|
"shortread_clipmerge_tool": {
|
||||||
|
"type": "string",
|
||||||
|
"default": "fastp",
|
||||||
|
"enum": [
|
||||||
|
"fastp",
|
||||||
|
"adapterremoval"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"shortread_clipmerge_skipadaptertrim": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"shortread_clipmerge_mergepairs": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"shortread_clipmerge_adapter1": {
|
||||||
|
"type": "string",
|
||||||
|
"default": "None"
|
||||||
|
},
|
||||||
|
"shortread_clipmerge_adapter2": {
|
||||||
|
"type": "string",
|
||||||
|
"default": "None"
|
||||||
|
},
|
||||||
|
"shortread_clipmerge_minlength": {
|
||||||
|
"type": "integer",
|
||||||
|
"default": 15
|
||||||
|
},
|
||||||
|
"centrifuge_db_name": {
|
||||||
|
"type": "string",
|
||||||
|
"default": "false"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -12,16 +12,17 @@ workflow DB_CHECK {
|
||||||
main:
|
main:
|
||||||
|
|
||||||
// TODO: make database sheet check
|
// TODO: make database sheet check
|
||||||
|
// Checks:
|
||||||
|
// 1) no duplicates,
|
||||||
|
// 2) args do not have quotes, e.g. just `,,` and NOT `,"",`
|
||||||
parsed_samplesheet = DATABASE_CHECK ( dbsheet )
|
parsed_samplesheet = DATABASE_CHECK ( dbsheet )
|
||||||
.csv
|
.csv
|
||||||
.splitCsv ( header:true, sep:',' )
|
.splitCsv ( header:true, sep:',' )
|
||||||
.dump(tag: "db_split_csv_out")
|
|
||||||
.map { create_db_channels(it) }
|
.map { create_db_channels(it) }
|
||||||
.dump(tag: "db_channel_prepped")
|
|
||||||
|
|
||||||
ch_dbs_for_untar = parsed_samplesheet
|
ch_dbs_for_untar = parsed_samplesheet
|
||||||
.branch {
|
.branch {
|
||||||
untar: it[1].toString().endsWith(".tar.gz") && it[0]['tool'] != "centrifuge"
|
untar: it[1].toString().endsWith(".tar.gz")
|
||||||
skip: true
|
skip: true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -12,7 +12,6 @@ workflow INPUT_CHECK {
|
||||||
parsed_samplesheet = SAMPLESHEET_CHECK ( samplesheet )
|
parsed_samplesheet = SAMPLESHEET_CHECK ( samplesheet )
|
||||||
.csv
|
.csv
|
||||||
.splitCsv ( header:true, sep:',' )
|
.splitCsv ( header:true, sep:',' )
|
||||||
.dump(tag: "input_split_csv_out")
|
|
||||||
.branch {
|
.branch {
|
||||||
fasta: it['fasta'] != ''
|
fasta: it['fasta'] != ''
|
||||||
nanopore: it['instrument_platform'] == 'OXFORD_NANOPORE'
|
nanopore: it['instrument_platform'] == 'OXFORD_NANOPORE'
|
||||||
|
@ -21,23 +20,20 @@ workflow INPUT_CHECK {
|
||||||
|
|
||||||
parsed_samplesheet.fastq
|
parsed_samplesheet.fastq
|
||||||
.map { create_fastq_channel(it) }
|
.map { create_fastq_channel(it) }
|
||||||
.dump(tag: "fastq_channel_init")
|
|
||||||
.set { fastq }
|
.set { fastq }
|
||||||
|
|
||||||
parsed_samplesheet.nanopore
|
parsed_samplesheet.nanopore
|
||||||
.map { create_fastq_channel(it) }
|
.map { create_fastq_channel(it) }
|
||||||
.dump(tag: "fastq_nanopore_channel_init")
|
|
||||||
.set { nanopore }
|
.set { nanopore }
|
||||||
|
|
||||||
parsed_samplesheet.fasta
|
parsed_samplesheet.fasta
|
||||||
.map { create_fasta_channel(it) }
|
.map { create_fasta_channel(it) }
|
||||||
.dump(tag: "fasta_channel_init")
|
|
||||||
.set { fasta }
|
.set { fasta }
|
||||||
|
|
||||||
emit:
|
emit:
|
||||||
fastq // channel: [ val(meta), [ reads ] ]
|
fastq = fastq ?: [] // channel: [ val(meta), [ reads ] ]
|
||||||
nanopore // channel: [ val(meta), [ reads ] ]
|
nanopore = nanopore ?: [] // channel: [ val(meta), [ reads ] ]
|
||||||
fasta // channel: [ val(meta), fasta ]
|
fasta = fasta ?: [] // channel: [ val(meta), fasta ]
|
||||||
versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ]
|
versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -72,9 +68,7 @@ def create_fastq_channel(LinkedHashMap row) {
|
||||||
|
|
||||||
}
|
}
|
||||||
return fastq_meta
|
return fastq_meta
|
||||||
}
|
}// Function to get list of [ meta, fasta ]
|
||||||
|
|
||||||
// Function to get list of [ meta, fasta ]
|
|
||||||
def create_fasta_channel(LinkedHashMap row) {
|
def create_fasta_channel(LinkedHashMap row) {
|
||||||
def meta = [:]
|
def meta = [:]
|
||||||
meta.id = row.sample
|
meta.id = row.sample
|
||||||
|
|
|
@ -1,6 +1,9 @@
|
||||||
|
/*
|
||||||
|
Process long raw reads with porechop
|
||||||
|
*/
|
||||||
|
|
||||||
include { FASTQC as FASTQC_POST } from '../../modules/nf-core/modules/fastqc/main'
|
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main'
|
||||||
include { PORECHOP } from '../../modules/nf-core/modules/porechop/main'
|
include { PORECHOP } from '../../modules/nf-core/modules/porechop/main'
|
||||||
|
|
||||||
workflow LONGREAD_PREPROCESSING {
|
workflow LONGREAD_PREPROCESSING {
|
||||||
take:
|
take:
|
||||||
|
@ -13,7 +16,6 @@ workflow LONGREAD_PREPROCESSING {
|
||||||
PORECHOP ( reads )
|
PORECHOP ( reads )
|
||||||
|
|
||||||
ch_processed_reads = PORECHOP.out.reads
|
ch_processed_reads = PORECHOP.out.reads
|
||||||
.dump(tag: "pre_fastqc_check")
|
|
||||||
.map {
|
.map {
|
||||||
meta, reads ->
|
meta, reads ->
|
||||||
def meta_new = meta.clone()
|
def meta_new = meta.clone()
|
||||||
|
@ -21,9 +23,9 @@ workflow LONGREAD_PREPROCESSING {
|
||||||
[ meta_new, reads ]
|
[ meta_new, reads ]
|
||||||
}
|
}
|
||||||
|
|
||||||
FASTQC_POST ( PORECHOP.out.reads )
|
FASTQC_PROCESSED ( PORECHOP.out.reads )
|
||||||
ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
|
ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_POST.out.zip.collect{it[1]} )
|
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip.collect{it[1]} )
|
||||||
|
|
||||||
|
|
||||||
emit:
|
emit:
|
||||||
|
|
129
subworkflows/local/shortread_adapterremoval.nf
Normal file
129
subworkflows/local/shortread_adapterremoval.nf
Normal file
|
@ -0,0 +1,129 @@
|
||||||
|
/*
|
||||||
|
Process short raw reads with AdapterRemoval
|
||||||
|
*/
|
||||||
|
|
||||||
|
include { ADAPTERREMOVAL as ADAPTERREMOVAL_SINGLE } from '../../modules/nf-core/modules/adapterremoval/main'
|
||||||
|
include { ADAPTERREMOVAL as ADAPTERREMOVAL_PAIRED } from '../../modules/nf-core/modules/adapterremoval/main'
|
||||||
|
include { CAT_FASTQ } from '../../modules/nf-core/modules/cat/fastq/main'
|
||||||
|
include {
|
||||||
|
ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION1;
|
||||||
|
ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION2;
|
||||||
|
ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION3;
|
||||||
|
} from '../../modules/local/ensure_fastq_extension'
|
||||||
|
|
||||||
|
workflow SHORTREAD_ADAPTERREMOVAL {
|
||||||
|
|
||||||
|
take:
|
||||||
|
reads // [[meta], [reads]]
|
||||||
|
|
||||||
|
main:
|
||||||
|
ch_versions = Channel.empty()
|
||||||
|
ch_multiqc_files = Channel.empty()
|
||||||
|
|
||||||
|
ch_input_for_adapterremoval = reads
|
||||||
|
.branch{
|
||||||
|
single: it[0].single_end
|
||||||
|
paired: !it[0].single_end
|
||||||
|
}
|
||||||
|
|
||||||
|
ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, [] )
|
||||||
|
ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, [] )
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Due to the ~slightly~ very ugly output implementation of the current AdapterRemoval2 version, each file
|
||||||
|
* has to be exported in a separate channel and we must manually recombine when necessary.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) {
|
||||||
|
|
||||||
|
ENSURE_FASTQ_EXTENSION1(
|
||||||
|
Channel.empty().mix(
|
||||||
|
ADAPTERREMOVAL_PAIRED.out.collapsed,
|
||||||
|
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated,
|
||||||
|
ADAPTERREMOVAL_PAIRED.out.singles_truncated,
|
||||||
|
ADAPTERREMOVAL_PAIRED.out.pair1_truncated,
|
||||||
|
ADAPTERREMOVAL_PAIRED.out.pair2_truncated
|
||||||
|
)
|
||||||
|
.map { meta, reads ->
|
||||||
|
meta.single_end = true
|
||||||
|
[meta, reads]
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
CAT_FASTQ(
|
||||||
|
ENSURE_FASTQ_EXTENSION1.out.reads
|
||||||
|
.groupTuple()
|
||||||
|
)
|
||||||
|
|
||||||
|
ENSURE_FASTQ_EXTENSION2(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
|
||||||
|
|
||||||
|
ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
|
||||||
|
.mix(ENSURE_FASTQ_EXTENSION2.out.reads)
|
||||||
|
|
||||||
|
} else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) {
|
||||||
|
|
||||||
|
ENSURE_FASTQ_EXTENSION1(
|
||||||
|
Channel.empty().mix(
|
||||||
|
ADAPTERREMOVAL_PAIRED.out.collapsed,
|
||||||
|
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated
|
||||||
|
)
|
||||||
|
.map { meta, reads ->
|
||||||
|
meta.single_end = true
|
||||||
|
[meta, reads]
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
CAT_FASTQ(
|
||||||
|
ENSURE_FASTQ_EXTENSION1.out.reads
|
||||||
|
.groupTuple()
|
||||||
|
)
|
||||||
|
|
||||||
|
ENSURE_FASTQ_EXTENSION2(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
|
||||||
|
|
||||||
|
ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
|
||||||
|
.mix(ENSURE_FASTQ_EXTENSION2.out.reads)
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
ENSURE_FASTQ_EXTENSION1(
|
||||||
|
ADAPTERREMOVAL_PAIRED.out.pair1_truncated
|
||||||
|
.map { meta, reads ->
|
||||||
|
meta.single_end = true
|
||||||
|
[meta, reads]
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
ENSURE_FASTQ_EXTENSION2(
|
||||||
|
ADAPTERREMOVAL_PAIRED.out.pair2_truncated
|
||||||
|
.map { meta, reads ->
|
||||||
|
meta.single_end = true
|
||||||
|
[meta, reads]
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
ENSURE_FASTQ_EXTENSION3(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
|
||||||
|
|
||||||
|
ch_adapterremoval_reads_prepped = ENSURE_FASTQ_EXTENSION1.out.reads
|
||||||
|
.join(ENSURE_FASTQ_EXTENSION2.out.reads)
|
||||||
|
.groupTuple()
|
||||||
|
.map { meta, pair1, pair2 ->
|
||||||
|
meta.single_end = false
|
||||||
|
[ meta, [ pair1, pair2 ].flatten() ]
|
||||||
|
}
|
||||||
|
.mix(ENSURE_FASTQ_EXTENSION3.out.reads)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
ch_versions = ch_versions.mix( ADAPTERREMOVAL_SINGLE.out.versions.first() )
|
||||||
|
ch_versions = ch_versions.mix( ADAPTERREMOVAL_PAIRED.out.versions.first() )
|
||||||
|
ch_multiqc_files = ch_multiqc_files.mix(
|
||||||
|
ADAPTERREMOVAL_PAIRED.out.log.collect{it[1]},
|
||||||
|
ADAPTERREMOVAL_SINGLE.out.log.collect{it[1]}
|
||||||
|
)
|
||||||
|
|
||||||
|
emit:
|
||||||
|
reads = ch_adapterremoval_reads_prepped // channel: [ val(meta), [ reads ] ]
|
||||||
|
versions = ch_versions // channel: [ versions.yml ]
|
||||||
|
mqc = ch_multiqc_files
|
||||||
|
}
|
||||||
|
|
55
subworkflows/local/shortread_fastp.nf
Normal file
55
subworkflows/local/shortread_fastp.nf
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
/*
|
||||||
|
Process short raw reads with FastP
|
||||||
|
*/
|
||||||
|
|
||||||
|
include { FASTP as FASTP_SINGLE } from '../../modules/nf-core/modules/fastp/main'
|
||||||
|
include { FASTP as FASTP_PAIRED } from '../../modules/nf-core/modules/fastp/main'
|
||||||
|
|
||||||
|
workflow SHORTREAD_FASTP {
|
||||||
|
take:
|
||||||
|
reads // [[meta], [reads]]
|
||||||
|
|
||||||
|
main:
|
||||||
|
ch_versions = Channel.empty()
|
||||||
|
ch_multiqc_files = Channel.empty()
|
||||||
|
|
||||||
|
ch_input_for_fastp = reads
|
||||||
|
.branch{
|
||||||
|
single: it[0]['single_end'] == true
|
||||||
|
paired: it[0]['single_end'] == false
|
||||||
|
}
|
||||||
|
|
||||||
|
FASTP_SINGLE ( ch_input_for_fastp.single, false, false )
|
||||||
|
// Last parameter here turns on merging of PE data
|
||||||
|
FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_clipmerge_mergepairs )
|
||||||
|
|
||||||
|
if ( params.shortread_clipmerge_mergepairs ) {
|
||||||
|
ch_fastp_reads_prepped_pe = FASTP_PAIRED.out.reads_merged
|
||||||
|
.map {
|
||||||
|
meta, reads ->
|
||||||
|
def meta_new = meta.clone()
|
||||||
|
meta_new['single_end'] = 1
|
||||||
|
[ meta_new, reads ]
|
||||||
|
}
|
||||||
|
|
||||||
|
ch_fastp_reads_prepped = ch_fastp_reads_prepped_pe.mix( FASTP_SINGLE.out.reads )
|
||||||
|
|
||||||
|
} else {
|
||||||
|
ch_fastp_reads_prepped = FASTP_PAIRED.out.reads
|
||||||
|
.mix( FASTP_SINGLE.out.reads )
|
||||||
|
}
|
||||||
|
|
||||||
|
ch_versions = ch_versions.mix(FASTP_SINGLE.out.versions.first())
|
||||||
|
ch_versions = ch_versions.mix(FASTP_PAIRED.out.versions.first())
|
||||||
|
|
||||||
|
ch_processed_reads = ch_fastp_reads_prepped
|
||||||
|
|
||||||
|
ch_multiqc_files = ch_multiqc_files.mix( FASTP_SINGLE.out.json.collect{it[1]} )
|
||||||
|
ch_multiqc_files = ch_multiqc_files.mix( FASTP_PAIRED.out.json.collect{it[1]} )
|
||||||
|
|
||||||
|
emit:
|
||||||
|
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
|
||||||
|
versions = ch_versions // channel: [ versions.yml ]
|
||||||
|
mqc = ch_multiqc_files
|
||||||
|
}
|
||||||
|
|
|
@ -3,67 +3,33 @@
|
||||||
//
|
//
|
||||||
|
|
||||||
|
|
||||||
include { FASTP as FASTP_SINGLE } from '../../modules/nf-core/modules/fastp/main'
|
include { SHORTREAD_FASTP } from './shortread_fastp'
|
||||||
include { FASTP as FASTP_PAIRED } from '../../modules/nf-core/modules/fastp/main'
|
include { SHORTREAD_ADAPTERREMOVAL } from './shortread_adapterremoval'
|
||||||
include { FASTQC as FASTQC_POST } from '../../modules/nf-core/modules/fastqc/main'
|
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main'
|
||||||
|
|
||||||
workflow SHORTREAD_PREPROCESSING {
|
workflow SHORTREAD_PREPROCESSING {
|
||||||
take:
|
take:
|
||||||
reads // file: /path/to/samplesheet.csv
|
reads // file: /path/to/samplesheet.csv
|
||||||
|
|
||||||
main:
|
main:
|
||||||
ch_versions = Channel.empty()
|
ch_versions = Channel.empty()
|
||||||
ch_multiqc_files = Channel.empty()
|
ch_multiqc_files = Channel.empty()
|
||||||
|
|
||||||
//
|
|
||||||
// STEP: Read clipping and merging
|
|
||||||
//
|
|
||||||
// TODO give option to clip only and retain pairs
|
|
||||||
// TODO give option to retain singletons (probably fastp option likely)
|
|
||||||
// TODO move to subworkflow
|
|
||||||
|
|
||||||
|
|
||||||
if ( params.shortread_clipmerge ) {
|
|
||||||
|
|
||||||
ch_input_for_fastp = reads
|
|
||||||
.dump(tag: "pre-fastp_branch")
|
|
||||||
.branch{
|
|
||||||
single: it[0]['single_end'] == true
|
|
||||||
paired: it[0]['single_end'] == false
|
|
||||||
}
|
|
||||||
|
|
||||||
ch_input_for_fastp.single.dump(tag: "input_fastp_single")
|
|
||||||
ch_input_for_fastp.paired.dump(tag: "input_fastp_paired")
|
|
||||||
|
|
||||||
FASTP_SINGLE ( ch_input_for_fastp.single, false, false )
|
|
||||||
FASTP_PAIRED ( ch_input_for_fastp.paired, false, true )
|
|
||||||
|
|
||||||
ch_fastp_reads_prepped = FASTP_PAIRED.out.reads_merged
|
|
||||||
.mix( FASTP_SINGLE.out.reads )
|
|
||||||
.map {
|
|
||||||
meta, reads ->
|
|
||||||
def meta_new = meta.clone()
|
|
||||||
meta_new['single_end'] = 1
|
|
||||||
[ meta_new, reads ]
|
|
||||||
}
|
|
||||||
|
|
||||||
FASTQC_POST ( ch_fastp_reads_prepped )
|
|
||||||
|
|
||||||
ch_versions = ch_versions.mix(FASTP_SINGLE.out.versions.first())
|
|
||||||
ch_versions = ch_versions.mix(FASTP_PAIRED.out.versions.first())
|
|
||||||
|
|
||||||
ch_processed_reads = ch_fastp_reads_prepped
|
|
||||||
|
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_POST.out.zip.collect{it[1]} )
|
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( FASTP_SINGLE.out.json.collect{it[1]} )
|
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( FASTP_PAIRED.out.json.collect{it[1]} )
|
|
||||||
|
|
||||||
ch_multiqc_files.dump(tag: "preprocessing_mqc_final")
|
|
||||||
|
|
||||||
|
if ( params.shortread_clipmerge_tool == "fastp" ) {
|
||||||
|
ch_processed_reads = SHORTREAD_FASTP ( reads ).reads
|
||||||
|
ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions )
|
||||||
|
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc )
|
||||||
|
} else if ( params.shortread_clipmerge_tool == "adapterremoval" ) {
|
||||||
|
ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads
|
||||||
|
ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions )
|
||||||
|
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc )
|
||||||
} else {
|
} else {
|
||||||
ch_processed_reads = reads
|
ch_processed_reads = reads
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FASTQC_PROCESSED ( ch_processed_reads )
|
||||||
|
ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
|
||||||
|
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip.collect{it[1]} )
|
||||||
|
|
||||||
emit:
|
emit:
|
||||||
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
|
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
|
||||||
|
|
|
@ -17,6 +17,8 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true
|
||||||
// Check mandatory parameters
|
// Check mandatory parameters
|
||||||
if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
|
if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
|
||||||
if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
|
if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
|
||||||
|
if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] warning: MALT does not except uncollapsed paired-reads. Pairs will be profiled as separate files."
|
||||||
|
if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "[nf-core/taxprofiler] error: cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
@ -36,11 +38,11 @@ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multi
|
||||||
//
|
//
|
||||||
// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
|
// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
|
||||||
//
|
//
|
||||||
include { INPUT_CHECK } from '../subworkflows/local/input_check'
|
include { INPUT_CHECK } from '../subworkflows/local/input_check'
|
||||||
|
|
||||||
include { DB_CHECK } from '../subworkflows/local/db_check'
|
include { DB_CHECK } from '../subworkflows/local/db_check'
|
||||||
include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing'
|
include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing'
|
||||||
include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing'
|
include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing'
|
||||||
|
|
||||||
/*
|
/*
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
@ -59,6 +61,7 @@ include { CAT_FASTQ } from '../modules/nf-core/modules/cat/fas
|
||||||
include { MALT_RUN } from '../modules/nf-core/modules/malt/run/main'
|
include { MALT_RUN } from '../modules/nf-core/modules/malt/run/main'
|
||||||
include { KRAKEN2_KRAKEN2 } from '../modules/nf-core/modules/kraken2/kraken2/main'
|
include { KRAKEN2_KRAKEN2 } from '../modules/nf-core/modules/kraken2/kraken2/main'
|
||||||
include { CENTRIFUGE } from '../modules/nf-core/modules/centrifuge/main'
|
include { CENTRIFUGE } from '../modules/nf-core/modules/centrifuge/main'
|
||||||
|
include { METAPHLAN3 } from '../modules/nf-core/modules/metaphlan3/main'
|
||||||
|
|
||||||
/*
|
/*
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
@ -73,9 +76,9 @@ workflow TAXPROFILER {
|
||||||
|
|
||||||
ch_versions = Channel.empty()
|
ch_versions = Channel.empty()
|
||||||
|
|
||||||
//
|
/*
|
||||||
// SUBWORKFLOW: Read in samplesheet, validate and stage input files
|
SUBWORKFLOW: Read in samplesheet, validate and stage input files
|
||||||
//
|
*/
|
||||||
INPUT_CHECK (
|
INPUT_CHECK (
|
||||||
ch_input
|
ch_input
|
||||||
)
|
)
|
||||||
|
@ -85,22 +88,24 @@ workflow TAXPROFILER {
|
||||||
ch_databases
|
ch_databases
|
||||||
)
|
)
|
||||||
|
|
||||||
//
|
/*
|
||||||
// MODULE: Run FastQC
|
MODULE: Run FastQC
|
||||||
//
|
*/
|
||||||
ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore ).dump(tag: "input_to_fastq")
|
ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore ).dump(tag: "input_to_fastq")
|
||||||
|
|
||||||
FASTQC (
|
FASTQC (
|
||||||
ch_input_for_fastqc
|
ch_input_for_fastqc
|
||||||
)
|
)
|
||||||
|
|
||||||
ch_versions = ch_versions.mix(FASTQC.out.versions.first())
|
ch_versions = ch_versions.mix(FASTQC.out.versions.first())
|
||||||
|
|
||||||
CUSTOM_DUMPSOFTWAREVERSIONS (
|
CUSTOM_DUMPSOFTWAREVERSIONS (
|
||||||
ch_versions.unique().collectFile(name: 'collated_versions.yml')
|
ch_versions.unique().collectFile(name: 'collated_versions.yml')
|
||||||
)
|
)
|
||||||
|
|
||||||
//
|
/*
|
||||||
// PERFORM PREPROCESSING
|
SUBWORKFLOW: PERFORM PREPROCESSING
|
||||||
//
|
*/
|
||||||
if ( params.shortread_clipmerge ) {
|
if ( params.shortread_clipmerge ) {
|
||||||
ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq ).reads
|
ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq ).reads
|
||||||
} else {
|
} else {
|
||||||
|
@ -115,54 +120,31 @@ workflow TAXPROFILER {
|
||||||
ch_longreads_preprocessed = INPUT_CHECK.out.nanopore
|
ch_longreads_preprocessed = INPUT_CHECK.out.nanopore
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
/*
|
||||||
// PERFORM SHORT READ RUN MERGING
|
COMBINE READS WITH POSSIBLE DATABASES
|
||||||
// TODO: Check not necessary for long reads too?
|
*/
|
||||||
//
|
|
||||||
ch_processed_for_combine = ch_shortreads_preprocessed
|
|
||||||
.dump(tag: "prep_for_combine_grouping")
|
|
||||||
.map {
|
|
||||||
meta, reads ->
|
|
||||||
def meta_new = meta.clone()
|
|
||||||
meta_new['run_accession'] = 'combined'
|
|
||||||
[ meta_new, reads ]
|
|
||||||
}
|
|
||||||
.groupTuple ( by: 0 )
|
|
||||||
.branch{
|
|
||||||
combine: it[1].size() >= 2
|
|
||||||
skip: it[1].size() < 2
|
|
||||||
}
|
|
||||||
|
|
||||||
CAT_FASTQ ( ch_processed_for_combine.combine )
|
|
||||||
|
|
||||||
ch_reads_for_profiling = ch_processed_for_combine.skip
|
|
||||||
.dump(tag: "skip_combine")
|
|
||||||
.mix( CAT_FASTQ.out.reads )
|
|
||||||
.dump(tag: "files_for_profiling")
|
|
||||||
|
|
||||||
//
|
|
||||||
// COMBINE READS WITH POSSIBLE DATABASES
|
|
||||||
//
|
|
||||||
|
|
||||||
// e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
|
// e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
|
||||||
ch_input_for_profiling = ch_reads_for_profiling
|
ch_input_for_profiling = ch_shortreads_preprocessed
|
||||||
.mix( ch_longreads_preprocessed )
|
.mix( ch_longreads_preprocessed )
|
||||||
.combine(DB_CHECK.out.dbs)
|
.combine(DB_CHECK.out.dbs)
|
||||||
.dump(tag: "reads_plus_db")
|
|
||||||
.branch {
|
.branch {
|
||||||
malt: it[2]['tool'] == 'malt'
|
malt: it[2]['tool'] == 'malt'
|
||||||
kraken2: it[2]['tool'] == 'kraken2'
|
kraken2: it[2]['tool'] == 'kraken2'
|
||||||
|
metaphlan3: it[2]['tool'] == 'metaphlan3'
|
||||||
centrifuge: it[2]['tool'] == 'centrifuge'
|
centrifuge: it[2]['tool'] == 'centrifuge'
|
||||||
unknown: true
|
unknown: true
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
/*
|
||||||
// PREPARE PROFILER INPUT CHANNELS
|
PREPARE PROFILER INPUT CHANNELS
|
||||||
//
|
*/
|
||||||
|
|
||||||
// We groupTuple to have all samples in one channel for MALT as database
|
// We groupTuple to have all samples in one channel for MALT as database
|
||||||
// loading takes a long time, so we only want to run it once per database
|
// loading takes a long time, so we only want to run it once per database
|
||||||
|
// TODO document somewhere we only accept illumina short reads for MALT?
|
||||||
ch_input_for_malt = ch_input_for_profiling.malt
|
ch_input_for_malt = ch_input_for_profiling.malt
|
||||||
|
.filter { it[0]['instrument_platform'] == 'ILLUMINA' }
|
||||||
.map {
|
.map {
|
||||||
it ->
|
it ->
|
||||||
def temp_meta = [ id: it[2]['db_name']] + it[2]
|
def temp_meta = [ id: it[2]['db_name']] + it[2]
|
||||||
|
@ -170,7 +152,6 @@ workflow TAXPROFILER {
|
||||||
[ temp_meta, it[1], db ]
|
[ temp_meta, it[1], db ]
|
||||||
}
|
}
|
||||||
.groupTuple(by: [0,2])
|
.groupTuple(by: [0,2])
|
||||||
.dump(tag: "input for malt")
|
|
||||||
.multiMap {
|
.multiMap {
|
||||||
it ->
|
it ->
|
||||||
reads: [ it[0], it[1].flatten() ]
|
reads: [ it[0], it[1].flatten() ]
|
||||||
|
@ -179,7 +160,6 @@ workflow TAXPROFILER {
|
||||||
|
|
||||||
// We can run Kraken2 one-by-one sample-wise
|
// We can run Kraken2 one-by-one sample-wise
|
||||||
ch_input_for_kraken2 = ch_input_for_profiling.kraken2
|
ch_input_for_kraken2 = ch_input_for_profiling.kraken2
|
||||||
.dump(tag: "input for kraken")
|
|
||||||
.multiMap {
|
.multiMap {
|
||||||
it ->
|
it ->
|
||||||
reads: [ it[0] + it[2], it[1] ]
|
reads: [ it[0] + it[2], it[1] ]
|
||||||
|
@ -198,6 +178,17 @@ workflow TAXPROFILER {
|
||||||
//
|
//
|
||||||
// RUN PROFILING
|
// RUN PROFILING
|
||||||
//
|
//
|
||||||
|
ch_input_for_metaphlan3 = ch_input_for_profiling.metaphlan3
|
||||||
|
.dump(tag: "input_metaphlan3")
|
||||||
|
.multiMap {
|
||||||
|
it ->
|
||||||
|
reads: [it[0] + it[2], it[1]]
|
||||||
|
db: it[3]
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
MODULE: RUN PROFILING
|
||||||
|
*/
|
||||||
if ( params.run_malt ) {
|
if ( params.run_malt ) {
|
||||||
MALT_RUN ( ch_input_for_malt.reads, params.malt_mode, ch_input_for_malt.db )
|
MALT_RUN ( ch_input_for_malt.reads, params.malt_mode, ch_input_for_malt.db )
|
||||||
}
|
}
|
||||||
|
@ -207,12 +198,16 @@ workflow TAXPROFILER {
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( params.run_centrifuge ) {
|
if ( params.run_centrifuge ) {
|
||||||
CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_unaligned, params.centrifuge_save_aligned, params.centrifuge_sam_format )
|
CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_db_name, params.centrifuge_save_unaligned, params.centrifuge_save_aligned, params.centrifuge_sam_format )
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
if ( params.run_metaphlan3 ) {
|
||||||
// MODULE: MultiQC
|
METAPHLAN3 ( ch_input_for_metaphlan3.reads, ch_input_for_metaphlan3.db )
|
||||||
//
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
MODULE: MultiQC
|
||||||
|
*/
|
||||||
workflow_summary = WorkflowTaxprofiler.paramsSummaryMultiqc(workflow, summary_params)
|
workflow_summary = WorkflowTaxprofiler.paramsSummaryMultiqc(workflow, summary_params)
|
||||||
ch_workflow_summary = Channel.value(workflow_summary)
|
ch_workflow_summary = Channel.value(workflow_summary)
|
||||||
|
|
||||||
|
@ -240,6 +235,7 @@ workflow TAXPROFILER {
|
||||||
|
|
||||||
// TODO MALT results overwriting per database?
|
// TODO MALT results overwriting per database?
|
||||||
// TODO Versions for Karken/MALT not report?
|
// TODO Versions for Karken/MALT not report?
|
||||||
|
// TODO create multiQC module for metaphlan
|
||||||
MULTIQC (
|
MULTIQC (
|
||||||
ch_multiqc_files.collect()
|
ch_multiqc_files.collect()
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in a new issue