1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-22 11:19:54 +00:00

Add centrifuge classificatioN

This commit is contained in:
sofstam 2022-04-04 13:51:51 +02:00
commit d897c922b2
21 changed files with 751 additions and 159 deletions

View file

@ -28,6 +28,10 @@ jobs:
# Test latest edge release of Nextflow # Test latest edge release of Nextflow
- NXF_VER: "" - NXF_VER: ""
NXF_EDGE: "1" NXF_EDGE: "1"
parameters:
- "--shortread_clipmerge_tool fastp"
- "--shortread_clipmerge_tool adapterremoval"
steps: steps:
- name: Check out pipeline code - name: Check out pipeline code
uses: actions/checkout@v2 uses: actions/checkout@v2
@ -42,11 +46,19 @@ jobs:
wget -qO- get.nextflow.io | bash wget -qO- get.nextflow.io | bash
sudo mv nextflow /usr/local/bin/ sudo mv nextflow /usr/local/bin/
- name: Show current locale
run: locale
- name: Set UTF-8 enabled locale
run: |
sudo locale-gen en_US.UTF-8
sudo update-locale LANG=en_US.UTF-8
- name: Run pipeline with test data - name: Run pipeline with test data
# TODO nf-core: You can customise CI pipeline run tests as required # TODO nf-core: You can customise CI pipeline run tests as required
# For example: adding multiple test runs with different parameters # For example: adding multiple test runs with different parameters
# Remember that you can parallelise this by using strategy.matrix # Remember that you can parallelise this by using strategy.matrix
run: | run: |
nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results ${{ matrix.parameters }}
# #

View file

@ -13,9 +13,30 @@
- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)
- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/)
> Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.
* [Porechop](https://github.com/rrwick/Porechop) - [fastp](https://doi.org/10.1093/bioinformatics/bty560)
> Chen, Shifu, Yanqing Zhou, Yaru Chen, and Jia Gu. 2018. “Fastp: An Ultra-Fast All-in-One FASTQ Preprocessor.” Bioinformatics 34 (17): i884-90. 10.1093/bioinformatics/bty560.
- [AdapterRemoval2](https://doi.org/10.1186/s13104-016-1900-2)
> Schubert, Mikkel, Stinus Lindgreen, and Ludovic Orlando. 2016. “AdapterRemoval v2: Rapid Adapter Trimming, Identification, and Read Merging.” BMC Research Notes 9 (February): 88. doi:10.1186/s13104-016-1900-2.
- [Porechop](https://github.com/rrwick/Porechop)
- [Kraken2](https://doi.org/10.1186/s13059-019-1891-0)
> Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. “Improved Metagenomic Analysis with Kraken 2.” Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0.
- [MALT](https://doi.org/10.1038/s41559-017-0446-6)
> Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. “Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico.” Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6.
- [MetaPhlAn3](https://doi.org/10.7554/eLife.65088)
> Beghini, Francesco, Lauren J McIver, Aitor Blanco-Míguez, Leonard Dubois, Francesco Asnicar, Sagun Maharjan, Ana Mailyan, et al. 2021. “Integrating Taxonomic, Functional, and Strain-Level Profiling of Diverse Microbial Communities with BioBakery 3.” Edited by Peter Turnbaugh, Eduardo Franco, and C Titus Brown. ELife 10 (May): e65088.
## Software packaging/containerisation tools ## Software packaging/containerisation tools

View file

@ -52,13 +52,25 @@ process {
] ]
} }
withName: FASTP { withName: FASTQC_PROCESSED {
ext.prefix = { "${meta.id}_${meta.run_accession}" } ext.args = '--quiet'
// TODO also include option to NOT merge ext.prefix = { "${meta.id}_${meta.run_accession}_processed" }
publishDir = [
path: { "${params.outdir}/fastqc/processed" },
mode: 'copy',
pattern: '*.html'
]
}
withName: FASTP_SINGLE {
ext.args = [ ext.args = [
{ ${meta.single_end} } == 0 ? "-m" : '', // trimming options
params.shortread_excludeunmerged ? '' : "--include_unmerged" params.shortread_clipmerge_skipadaptertrim ? "--disable_adapter_trimming" : "",
params.shortread_clipmerge_adapter1 ? "--adapter_sequence ${params.shortread_clipmerge_adapter1}" : "",
// filtering options
"--length_required ${params.shortread_clipmerge_minlength}"
].join(' ').trim() ].join(' ').trim()
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [ publishDir = [
path: { "${params.outdir}/fastp" }, path: { "${params.outdir}/fastp" },
mode: 'copy', mode: 'copy',
@ -66,6 +78,61 @@ process {
] ]
} }
withName: FASTP_PAIRED {
ext.args = [
// collapsing options - option to retain singletons
params.shortread_clipmerge_excludeunmerged ? '' : "--include_unmerged",
// trimming options
params.shortread_clipmerge_skipadaptertrim ? "--disable_adapter_trimming" : "",
params.shortread_clipmerge_adapter1 ? "--adapter_sequence ${params.shortread_clipmerge_adapter1}" : "",
params.shortread_clipmerge_adapter2 ? "--adapter_sequence_r2 ${params.shortread_clipmerge_adapter2}" : "--detect_adapter_for_pe",
// filtering options
"--length_required ${params.shortread_clipmerge_minlength}"
].join(' ').trim()
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
path: { "${params.outdir}/fastp" },
mode: 'copy',
pattern: '*.fastq.gz'
]
}
withName: ADAPTERREMOVAL_SINGLE {
ext.args = [
// trimming options
params.shortread_clipmerge_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
params.shortread_clipmerge_adapter1 ? "--adapter1 ${params.shortread_clipmerge_adapter1}" : "",
// filtering options
"--minlength ${params.shortread_clipmerge_minlength}"
].join(' ').trim()
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
path: { "${params.outdir}/adapterremoval" },
mode: 'copy',
pattern: '*.fastq.gz'
]
}
withName: ADAPTERREMOVAL_PAIRED {
ext.args = [
// collapsing options
params.shortread_clipmerge_mergepairs ? "--collapse" : "",
// trimming options
params.shortread_clipmerge_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
params.shortread_clipmerge_adapter1 ? "--adapter1 ${params.shortread_clipmerge_adapter1}" : "",
params.shortread_clipmerge_adapter2 ? "--adapter2 ${params.shortread_clipmerge_adapter2}" : "",
// filtering options
"--minlength ${params.shortread_clipmerge_minlength}"
].join(' ').trim()
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
path: { "${params.outdir}/adapterremoval" },
mode: 'copy',
pattern: '*.fastq.gz'
]
}
withName: PORECHOP { withName: PORECHOP {
ext.prefix = { "${meta.id}_${meta.run_accession}" } ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [ publishDir = [
@ -75,16 +142,6 @@ process {
] ]
} }
withName: FASTQC_POST {
ext.args = '--quiet'
ext.prefix = { "${meta.id}_${meta.run_accession}_processed" }
publishDir = [
path: { "${params.outdir}/fastqc/processed" },
mode: 'copy',
pattern: '*.html'
]
}
withName: CAT_FASTQ { withName: CAT_FASTQ {
publishDir = [ publishDir = [
path: { "${params.outdir}/prepared_sequences" }, path: { "${params.outdir}/prepared_sequences" },
@ -94,23 +151,32 @@ process {
} }
withName: MALT_RUN { withName: MALT_RUN {
ext.args = { "${meta.db_params}" }
ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
publishDir = [ publishDir = [
path: { "${params.outdir}/malt/${meta.db_name}" }, path: { "${params.outdir}/malt/${meta.db_name}" },
mode: 'copy', mode: 'copy',
pattern: '*.{rma6,tab,text,sam,log}' pattern: '*.{rma6,tab,text,sam,log}'
] ]
ext.args = { "${meta.db_params}" }
ext.prefix = { "${meta.id}-${meta.db_name}" }
} }
withName: KRAKEN2_KRAKEN2 { withName: KRAKEN2_KRAKEN2 {
ext.args = { "${meta.db_params}" }
ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
publishDir = [ publishDir = [
path: { "${params.outdir}/kraken2/${meta.db_name}" }, path: { "${params.outdir}/kraken2/${meta.db_name}" },
mode: 'copy', mode: 'copy',
pattern: '*.{fastq.gz,txt}' pattern: '*.{fastq.gz,txt}'
] ]
ext.args = { "${meta.db_params}" } }
ext.prefix = { "${meta.id}-${meta.db_name}" }
withName: METAPHLAN3 {
publishDir = [
path: { "${params.outdir}/metaphlan3/${meta.db_name}" },
mode: params.publish_dir_mode,
pattern: '*.{biom,txt}'
]
ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
} }
withName: CUSTOM_DUMPSOFTWAREVERSIONS { withName: CUSTOM_DUMPSOFTWAREVERSIONS {
@ -128,7 +194,7 @@ process {
pattern: '*.{fastq.gz,txt}' pattern: '*.{fastq.gz,txt}'
] ]
ext.args = { "${meta.db_params}" } ext.args = { "${meta.db_params}" }
ext.prefix = { "${meta.id}-${meta.db_name}" } ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
} }
} }

View file

@ -23,10 +23,10 @@ params {
// TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
// TODO nf-core: Give any required params for the test so that command line flags are not needed // TODO nf-core: Give any required params for the test so that command line flags are not needed
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv' input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
outdir = "./results"
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv' databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
run_kraken2 = true run_kraken2 = true
run_malt = true run_malt = true
run_metaphlan3 = true
shortread_clipmerge = true shortread_clipmerge = true
run_centrifuge = true run_centrifuge = true

View file

@ -3,6 +3,9 @@
"homePage": "https://github.com/nf-core/taxprofiler", "homePage": "https://github.com/nf-core/taxprofiler",
"repos": { "repos": {
"nf-core/modules": { "nf-core/modules": {
"adapterremoval": {
"git_sha": "f0800157544a82ae222931764483331a81812012"
},
"cat/fastq": { "cat/fastq": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
}, },
@ -21,17 +24,20 @@
"malt/run": { "malt/run": {
"git_sha": "72b96f4e504eef673f2b5c13560a9d90b669129b" "git_sha": "72b96f4e504eef673f2b5c13560a9d90b669129b"
}, },
"multiqc": { "metaphlan3": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
}, },
"untar": { "multiqc": {
"git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918" "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
}, },
"porechop": { "porechop": {
"git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046"
}, },
"centrifuge": { "centrifuge": {
"git_sha": "ea41a8a6f761b9993d857570e872abaae3fea555" "git_sha": "ea41a8a6f761b9993d857570e872abaae3fea555"
},
"untar": {
"git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918"
} }
} }
} }

View file

@ -0,0 +1,31 @@
process ENSURE_FASTQ_EXTENSION {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "conda-forge::bash=5.0" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv2/biocontainers_v1.2.0_cv2.img' :
'biocontainers/biocontainers:v1.2.0_cv2' }"
input:
tuple val(meta), path(reads)
output:
tuple val(meta), path('*.fastq.gz'), emit: reads
script:
if (meta.single_end) {
fastq = "${reads.baseName}.fastq.gz"
"""
ln -s '${reads}' '${fastq}'
"""
} else {
first = "${reads[0].baseName}.fastq.gz"
second = "${reads[1].baseName}.fastq.gz"
"""
ln -s '${reads[0]}' '${first}'
ln -s '${reads[1]}' '${second}'
"""
}
}

View file

@ -0,0 +1,70 @@
process ADAPTERREMOVAL {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::adapterremoval=2.3.2" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/adapterremoval:2.3.2--hb7ba0dd_0' :
'quay.io/biocontainers/adapterremoval:2.3.2--hb7ba0dd_0' }"
input:
tuple val(meta), path(reads)
path(adapterlist)
output:
tuple val(meta), path("${prefix}.truncated.gz") , optional: true, emit: singles_truncated
tuple val(meta), path("${prefix}.discarded.gz") , optional: true, emit: discarded
tuple val(meta), path("${prefix}.pair1.truncated.gz") , optional: true, emit: pair1_truncated
tuple val(meta), path("${prefix}.pair2.truncated.gz") , optional: true, emit: pair2_truncated
tuple val(meta), path("${prefix}.collapsed.gz") , optional: true, emit: collapsed
tuple val(meta), path("${prefix}.collapsed.truncated.gz") , optional: true, emit: collapsed_truncated
tuple val(meta), path("${prefix}.paired.gz") , optional: true, emit: paired_interleaved
tuple val(meta), path('*.log') , emit: log
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def list = adapterlist ? "--adapter-list ${adapterlist}" : ""
prefix = task.ext.prefix ?: "${meta.id}"
if (meta.single_end) {
"""
AdapterRemoval \\
--file1 $reads \\
$args \\
$adapterlist \\
--basename ${prefix} \\
--threads ${task.cpus} \\
--settings ${prefix}.log \\
--seed 42 \\
--gzip
cat <<-END_VERSIONS > versions.yml
"${task.process}":
adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")
END_VERSIONS
"""
} else {
"""
AdapterRemoval \\
--file1 ${reads[0]} \\
--file2 ${reads[1]} \\
$args \\
$adapterlist \\
--basename ${prefix} \\
--threads $task.cpus \\
--settings ${prefix}.log \\
--seed 42 \\
--gzip
cat <<-END_VERSIONS > versions.yml
"${task.process}":
adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")
END_VERSIONS
"""
}
}

View file

@ -0,0 +1,90 @@
name: adapterremoval
description: Trim sequencing adapters and collapse overlapping reads
keywords:
- trimming
- adapters
- merging
- fastq
tools:
- adapterremoval:
description: The AdapterRemoval v2 tool for merging and clipping reads.
homepage: https://github.com/MikkelSchubert/adapterremoval
documentation: https://adapterremoval.readthedocs.io
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
pattern: "*.{fq,fastq,fq.gz,fastq.gz}"
- adapterlist:
type: file
description: Optional text file containing list of adapters to look for for removal
with one adapter per line. Otherwise will look for default adapters (see
AdapterRemoval man page), or can be modified to remove user-specified
adapters via ext.args.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- singles_truncated:
type: file
description: |
Adapter trimmed FastQ files of either single-end reads, or singleton
'orphaned' reads from merging of paired-end data (i.e., one of the pair
was lost due to filtering thresholds).
pattern: "*.truncated.gz"
- discarded:
type: file
description: |
Adapter trimmed FastQ files of reads that did not pass filtering
thresholds.
pattern: "*.discarded.gz"
- pair1_truncated:
type: file
description: |
Adapter trimmed R1 FastQ files of paired-end reads that did not merge
with their respective R2 pair due to long templates. The respective pair
is stored in 'pair2_truncated'.
pattern: "*.pair1.truncated.gz"
- pair2_truncated:
type: file
description: |
Adapter trimmed R2 FastQ files of paired-end reads that did not merge
with their respective R1 pair due to long templates. The respective pair
is stored in 'pair1_truncated'.
pattern: "*.pair2.truncated.gz"
- collapsed:
type: file
description: |
Collapsed FastQ of paired-end reads that successfully merged with their
respective R1 pair but were not trimmed.
pattern: "*.collapsed.gz"
- collapsed_truncated:
type: file
description: |
Collapsed FastQ of paired-end reads that successfully merged with their
respective R1 pair and were trimmed of adapter due to sufficient overlap.
pattern: "*.collapsed.truncated.gz"
- log:
type: file
description: AdapterRemoval log file
pattern: "*.log"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@maxibor"
- "@jfy133"

View file

@ -10,6 +10,7 @@ process CENTRIFUGE {
input: input:
tuple val(meta), path(reads) tuple val(meta), path(reads)
path db path db
val db_name
val save_unaligned val save_unaligned
val save_aligned val save_aligned
val sam_format val sam_format
@ -42,9 +43,8 @@ process CENTRIFUGE {
} }
def sam_output = sam_format ? "--out-fmt 'sam'" : '' def sam_output = sam_format ? "--out-fmt 'sam'" : ''
""" """
tar -xf $db
centrifuge \\ centrifuge \\
-x $db_name \\ -x ${db}/${db_name} \\
-p $task.cpus \\ -p $task.cpus \\
$paired \\ $paired \\
--report-file ${prefix}.report.txt \\ --report-file ${prefix}.report.txt \\

View file

@ -27,6 +27,9 @@ input:
type: directory type: directory
description: Centrifuge database in .tar.gz format description: Centrifuge database in .tar.gz format
pattern: "*.tar.gz" pattern: "*.tar.gz"
- db_name:
type: string
description: Centrifuge database filenames without the suffix ".cf"
- save_unaligned: - save_unaligned:
type: value type: value
description: If true unmapped fastq files are saved description: If true unmapped fastq files are saved

View file

@ -0,0 +1,45 @@
process METAPHLAN3 {
tag "$meta.id"
label 'process_high'
conda (params.enable_conda ? 'bioconda::metaphlan=3.0.12' : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/metaphlan:3.0.12--pyhb7b1952_0' :
'quay.io/biocontainers/metaphlan:3.0.12--pyhb7b1952_0' }"
input:
tuple val(meta), path(input)
path metaphlan_db
output:
tuple val(meta), path("*_profile.txt") , emit: profile
tuple val(meta), path("*.biom") , emit: biom
tuple val(meta), path('*.bowtie2out.txt'), optional:true, emit: bt2out
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def input_type = ("$input".endsWith(".fastq.gz")) ? "--input_type fastq" : ("$input".contains(".fasta")) ? "--input_type fasta" : ("$input".endsWith(".bowtie2out.txt")) ? "--input_type bowtie2out" : "--input_type sam"
def input_data = ("$input_type".contains("fastq")) && !meta.single_end ? "${input[0]},${input[1]}" : "$input"
def bowtie2_out = "$input_type" == "--input_type bowtie2out" || "$input_type" == "--input_type sam" ? '' : "--bowtie2out ${prefix}.bowtie2out.txt"
"""
metaphlan \\
--nproc $task.cpus \\
$input_type \\
$input_data \\
$args \\
$bowtie2_out \\
--bowtie2db ${metaphlan_db} \\
--biom ${prefix}.biom \\
--output_file ${prefix}_profile.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
metaphlan3: \$(metaphlan --version 2>&1 | awk '{print \$3}')
END_VERSIONS
"""
}

View file

@ -0,0 +1,52 @@
name: metaphlan3
description: MetaPhlAn is a tool for profiling the composition of microbial communities from metagenomic shotgun sequencing data.
keywords:
- metagenomics
- classification
- fastq
- bam
- fasta
tools:
- metaphlan3:
description: Identify clades (phyla to species) present in the metagenome obtained from a microbiome sample and their relative abundance
homepage: https://huttenhower.sph.harvard.edu/metaphlan/
documentation: https://github.com/biobakery/MetaPhlAn
doi: "10.7554/eLife.65088"
licence: ["MIT License"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- input:
type: file
description: Metaphlan 3.0 can classify the metagenome from a variety of input data types, including FASTQ files (single-end and paired-end), FASTA, bowtie2-produced SAM files (produced from alignments to the MetaPHlAn marker database) and intermediate bowtie2 alignment files (bowtie2out)
pattern: "*.{fastq.gz, fasta, fasta.gz, sam, bowtie2out.txt}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- profile:
type: file
description: Tab-separated output file of the predicted taxon relative abundances
pattern: "*.{txt}"
- biom:
type: file
description: General-use format for representing biological sample by observation contingency tables
pattern: "*.{biom}"
- bowtie2out:
type: file
description: Intermediate Bowtie2 output produced from mapping the metagenome against the MetaPHlAn marker database ( not compatible with `bowtie2out` files generated with MetaPhlAn versions below 3 )
pattern: "*.{bowtie2out.txt}"
authors:
- "@MGordon09"

View file

@ -55,9 +55,15 @@ params {
databases = null databases = null
// FASTQ preprocessing // FASTQ preprocessing
shortread_clipmerge = false shortread_clipmerge = false
shortread_excludeunmerged = true shortread_clipmerge_tool = 'fastp'
longread_clip = false shortread_clipmerge_skipadaptertrim = false
shortread_clipmerge_mergepairs = false
shortread_clipmerge_excludeunmerged = false
shortread_clipmerge_adapter1 = null
shortread_clipmerge_adapter2 = null
shortread_clipmerge_minlength = 15
longread_clip = false
// MALT // MALT
run_malt = false run_malt = false
@ -68,9 +74,12 @@ params {
// centrifuge // centrifuge
run_centrifuge = false run_centrifuge = false
centrifuge_db_name = false
centrifuge_save_unaligned = false centrifuge_save_unaligned = false
centrifuge_save_aligned = false centrifuge_save_aligned = false
centrifuge_sam_format = false centrifuge_sam_format = false
// metaphlan3
run_metaphlan3 = false
} }
// Load base.config by default for all pipelines // Load base.config by default for all pipelines
@ -155,7 +164,7 @@ if (!params.igenomes_ignore) {
// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable.
env { env {
PYTHONNOUSERSITE = 1 PYTHONNOUSERSITE = '1'
R_PROFILE_USER = "/.Rprofile" R_PROFILE_USER = "/.Rprofile"
R_ENVIRON_USER = "/.Renviron" R_ENVIRON_USER = "/.Renviron"
JULIA_DEPOT_PATH = "/usr/local/share/julia" JULIA_DEPOT_PATH = "/usr/local/share/julia"

View file

@ -10,7 +10,10 @@
"type": "object", "type": "object",
"fa_icon": "fas fa-terminal", "fa_icon": "fas fa-terminal",
"description": "Define where the pipeline should find input data and save output data.", "description": "Define where the pipeline should find input data and save output data.",
"required": ["input", "outdir"], "required": [
"input",
"outdir"
],
"properties": { "properties": {
"input": { "input": {
"type": "string", "type": "string",
@ -173,7 +176,14 @@
"description": "Method used to save pipeline results to output directory.", "description": "Method used to save pipeline results to output directory.",
"help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
"fa_icon": "fas fa-copy", "fa_icon": "fas fa-copy",
"enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "enum": [
"symlink",
"rellink",
"link",
"copy",
"copyNoFollow",
"move"
],
"hidden": true "hidden": true
}, },
"email_on_fail": { "email_on_fail": {
@ -265,9 +275,9 @@
"shortread_clipmerge": { "shortread_clipmerge": {
"type": "boolean" "type": "boolean"
}, },
"shortread_excludeunmerged": { "shortread_clipmerge_excludeunmerged": {
"type": "boolean", "type": "boolean",
"default": true "default": false
}, },
"longread_clip": { "longread_clip": {
"type": "boolean" "type": "boolean"
@ -293,6 +303,40 @@
}, },
"centrifuge_sam_format": { "centrifuge_sam_format": {
"type": "boolean" "type": "boolean"
},
"run_metaphlan3": {
"type": "boolean",
"description": "Enable MetaPhlAn for taxonomic profiling"
},
"shortread_clipmerge_tool": {
"type": "string",
"default": "fastp",
"enum": [
"fastp",
"adapterremoval"
]
},
"shortread_clipmerge_skipadaptertrim": {
"type": "boolean"
},
"shortread_clipmerge_mergepairs": {
"type": "boolean"
},
"shortread_clipmerge_adapter1": {
"type": "string",
"default": "None"
},
"shortread_clipmerge_adapter2": {
"type": "string",
"default": "None"
},
"shortread_clipmerge_minlength": {
"type": "integer",
"default": 15
},
"centrifuge_db_name": {
"type": "string",
"default": "false"
} }
} }
} }

View file

@ -12,16 +12,17 @@ workflow DB_CHECK {
main: main:
// TODO: make database sheet check // TODO: make database sheet check
// Checks:
// 1) no duplicates,
// 2) args do not have quotes, e.g. just `,,` and NOT `,"",`
parsed_samplesheet = DATABASE_CHECK ( dbsheet ) parsed_samplesheet = DATABASE_CHECK ( dbsheet )
.csv .csv
.splitCsv ( header:true, sep:',' ) .splitCsv ( header:true, sep:',' )
.dump(tag: "db_split_csv_out")
.map { create_db_channels(it) } .map { create_db_channels(it) }
.dump(tag: "db_channel_prepped")
ch_dbs_for_untar = parsed_samplesheet ch_dbs_for_untar = parsed_samplesheet
.branch { .branch {
untar: it[1].toString().endsWith(".tar.gz") && it[0]['tool'] != "centrifuge" untar: it[1].toString().endsWith(".tar.gz")
skip: true skip: true
} }

View file

@ -12,7 +12,6 @@ workflow INPUT_CHECK {
parsed_samplesheet = SAMPLESHEET_CHECK ( samplesheet ) parsed_samplesheet = SAMPLESHEET_CHECK ( samplesheet )
.csv .csv
.splitCsv ( header:true, sep:',' ) .splitCsv ( header:true, sep:',' )
.dump(tag: "input_split_csv_out")
.branch { .branch {
fasta: it['fasta'] != '' fasta: it['fasta'] != ''
nanopore: it['instrument_platform'] == 'OXFORD_NANOPORE' nanopore: it['instrument_platform'] == 'OXFORD_NANOPORE'
@ -21,23 +20,20 @@ workflow INPUT_CHECK {
parsed_samplesheet.fastq parsed_samplesheet.fastq
.map { create_fastq_channel(it) } .map { create_fastq_channel(it) }
.dump(tag: "fastq_channel_init")
.set { fastq } .set { fastq }
parsed_samplesheet.nanopore parsed_samplesheet.nanopore
.map { create_fastq_channel(it) } .map { create_fastq_channel(it) }
.dump(tag: "fastq_nanopore_channel_init")
.set { nanopore } .set { nanopore }
parsed_samplesheet.fasta parsed_samplesheet.fasta
.map { create_fasta_channel(it) } .map { create_fasta_channel(it) }
.dump(tag: "fasta_channel_init")
.set { fasta } .set { fasta }
emit: emit:
fastq // channel: [ val(meta), [ reads ] ] fastq = fastq ?: [] // channel: [ val(meta), [ reads ] ]
nanopore // channel: [ val(meta), [ reads ] ] nanopore = nanopore ?: [] // channel: [ val(meta), [ reads ] ]
fasta // channel: [ val(meta), fasta ] fasta = fasta ?: [] // channel: [ val(meta), fasta ]
versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ]
} }
@ -72,9 +68,7 @@ def create_fastq_channel(LinkedHashMap row) {
} }
return fastq_meta return fastq_meta
} }// Function to get list of [ meta, fasta ]
// Function to get list of [ meta, fasta ]
def create_fasta_channel(LinkedHashMap row) { def create_fasta_channel(LinkedHashMap row) {
def meta = [:] def meta = [:]
meta.id = row.sample meta.id = row.sample

View file

@ -1,6 +1,9 @@
/*
Process long raw reads with porechop
*/
include { FASTQC as FASTQC_POST } from '../../modules/nf-core/modules/fastqc/main' include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main'
include { PORECHOP } from '../../modules/nf-core/modules/porechop/main' include { PORECHOP } from '../../modules/nf-core/modules/porechop/main'
workflow LONGREAD_PREPROCESSING { workflow LONGREAD_PREPROCESSING {
take: take:
@ -13,7 +16,6 @@ workflow LONGREAD_PREPROCESSING {
PORECHOP ( reads ) PORECHOP ( reads )
ch_processed_reads = PORECHOP.out.reads ch_processed_reads = PORECHOP.out.reads
.dump(tag: "pre_fastqc_check")
.map { .map {
meta, reads -> meta, reads ->
def meta_new = meta.clone() def meta_new = meta.clone()
@ -21,9 +23,9 @@ workflow LONGREAD_PREPROCESSING {
[ meta_new, reads ] [ meta_new, reads ]
} }
FASTQC_POST ( PORECHOP.out.reads ) FASTQC_PROCESSED ( PORECHOP.out.reads )
ch_versions = ch_versions.mix(PORECHOP.out.versions.first()) ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_POST.out.zip.collect{it[1]} ) ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip.collect{it[1]} )
emit: emit:

View file

@ -0,0 +1,129 @@
/*
Process short raw reads with AdapterRemoval
*/
include { ADAPTERREMOVAL as ADAPTERREMOVAL_SINGLE } from '../../modules/nf-core/modules/adapterremoval/main'
include { ADAPTERREMOVAL as ADAPTERREMOVAL_PAIRED } from '../../modules/nf-core/modules/adapterremoval/main'
include { CAT_FASTQ } from '../../modules/nf-core/modules/cat/fastq/main'
include {
ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION1;
ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION2;
ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION3;
} from '../../modules/local/ensure_fastq_extension'
workflow SHORTREAD_ADAPTERREMOVAL {
take:
reads // [[meta], [reads]]
main:
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()
ch_input_for_adapterremoval = reads
.branch{
single: it[0].single_end
paired: !it[0].single_end
}
ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, [] )
ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, [] )
/*
* Due to the ~slightly~ very ugly output implementation of the current AdapterRemoval2 version, each file
* has to be exported in a separate channel and we must manually recombine when necessary.
*/
if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) {
ENSURE_FASTQ_EXTENSION1(
Channel.empty().mix(
ADAPTERREMOVAL_PAIRED.out.collapsed,
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated,
ADAPTERREMOVAL_PAIRED.out.singles_truncated,
ADAPTERREMOVAL_PAIRED.out.pair1_truncated,
ADAPTERREMOVAL_PAIRED.out.pair2_truncated
)
.map { meta, reads ->
meta.single_end = true
[meta, reads]
}
)
CAT_FASTQ(
ENSURE_FASTQ_EXTENSION1.out.reads
.groupTuple()
)
ENSURE_FASTQ_EXTENSION2(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
.mix(ENSURE_FASTQ_EXTENSION2.out.reads)
} else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) {
ENSURE_FASTQ_EXTENSION1(
Channel.empty().mix(
ADAPTERREMOVAL_PAIRED.out.collapsed,
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated
)
.map { meta, reads ->
meta.single_end = true
[meta, reads]
}
)
CAT_FASTQ(
ENSURE_FASTQ_EXTENSION1.out.reads
.groupTuple()
)
ENSURE_FASTQ_EXTENSION2(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
.mix(ENSURE_FASTQ_EXTENSION2.out.reads)
} else {
ENSURE_FASTQ_EXTENSION1(
ADAPTERREMOVAL_PAIRED.out.pair1_truncated
.map { meta, reads ->
meta.single_end = true
[meta, reads]
}
)
ENSURE_FASTQ_EXTENSION2(
ADAPTERREMOVAL_PAIRED.out.pair2_truncated
.map { meta, reads ->
meta.single_end = true
[meta, reads]
}
)
ENSURE_FASTQ_EXTENSION3(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
ch_adapterremoval_reads_prepped = ENSURE_FASTQ_EXTENSION1.out.reads
.join(ENSURE_FASTQ_EXTENSION2.out.reads)
.groupTuple()
.map { meta, pair1, pair2 ->
meta.single_end = false
[ meta, [ pair1, pair2 ].flatten() ]
}
.mix(ENSURE_FASTQ_EXTENSION3.out.reads)
}
ch_versions = ch_versions.mix( ADAPTERREMOVAL_SINGLE.out.versions.first() )
ch_versions = ch_versions.mix( ADAPTERREMOVAL_PAIRED.out.versions.first() )
ch_multiqc_files = ch_multiqc_files.mix(
ADAPTERREMOVAL_PAIRED.out.log.collect{it[1]},
ADAPTERREMOVAL_SINGLE.out.log.collect{it[1]}
)
emit:
reads = ch_adapterremoval_reads_prepped // channel: [ val(meta), [ reads ] ]
versions = ch_versions // channel: [ versions.yml ]
mqc = ch_multiqc_files
}

View file

@ -0,0 +1,55 @@
/*
Process short raw reads with FastP
*/
include { FASTP as FASTP_SINGLE } from '../../modules/nf-core/modules/fastp/main'
include { FASTP as FASTP_PAIRED } from '../../modules/nf-core/modules/fastp/main'
workflow SHORTREAD_FASTP {
take:
reads // [[meta], [reads]]
main:
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()
ch_input_for_fastp = reads
.branch{
single: it[0]['single_end'] == true
paired: it[0]['single_end'] == false
}
FASTP_SINGLE ( ch_input_for_fastp.single, false, false )
// Last parameter here turns on merging of PE data
FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_clipmerge_mergepairs )
if ( params.shortread_clipmerge_mergepairs ) {
ch_fastp_reads_prepped_pe = FASTP_PAIRED.out.reads_merged
.map {
meta, reads ->
def meta_new = meta.clone()
meta_new['single_end'] = 1
[ meta_new, reads ]
}
ch_fastp_reads_prepped = ch_fastp_reads_prepped_pe.mix( FASTP_SINGLE.out.reads )
} else {
ch_fastp_reads_prepped = FASTP_PAIRED.out.reads
.mix( FASTP_SINGLE.out.reads )
}
ch_versions = ch_versions.mix(FASTP_SINGLE.out.versions.first())
ch_versions = ch_versions.mix(FASTP_PAIRED.out.versions.first())
ch_processed_reads = ch_fastp_reads_prepped
ch_multiqc_files = ch_multiqc_files.mix( FASTP_SINGLE.out.json.collect{it[1]} )
ch_multiqc_files = ch_multiqc_files.mix( FASTP_PAIRED.out.json.collect{it[1]} )
emit:
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
versions = ch_versions // channel: [ versions.yml ]
mqc = ch_multiqc_files
}

View file

@ -3,67 +3,33 @@
// //
include { FASTP as FASTP_SINGLE } from '../../modules/nf-core/modules/fastp/main' include { SHORTREAD_FASTP } from './shortread_fastp'
include { FASTP as FASTP_PAIRED } from '../../modules/nf-core/modules/fastp/main' include { SHORTREAD_ADAPTERREMOVAL } from './shortread_adapterremoval'
include { FASTQC as FASTQC_POST } from '../../modules/nf-core/modules/fastqc/main' include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main'
workflow SHORTREAD_PREPROCESSING { workflow SHORTREAD_PREPROCESSING {
take: take:
reads // file: /path/to/samplesheet.csv reads // file: /path/to/samplesheet.csv
main: main:
ch_versions = Channel.empty() ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty() ch_multiqc_files = Channel.empty()
//
// STEP: Read clipping and merging
//
// TODO give option to clip only and retain pairs
// TODO give option to retain singletons (probably fastp option likely)
// TODO move to subworkflow
if ( params.shortread_clipmerge ) {
ch_input_for_fastp = reads
.dump(tag: "pre-fastp_branch")
.branch{
single: it[0]['single_end'] == true
paired: it[0]['single_end'] == false
}
ch_input_for_fastp.single.dump(tag: "input_fastp_single")
ch_input_for_fastp.paired.dump(tag: "input_fastp_paired")
FASTP_SINGLE ( ch_input_for_fastp.single, false, false )
FASTP_PAIRED ( ch_input_for_fastp.paired, false, true )
ch_fastp_reads_prepped = FASTP_PAIRED.out.reads_merged
.mix( FASTP_SINGLE.out.reads )
.map {
meta, reads ->
def meta_new = meta.clone()
meta_new['single_end'] = 1
[ meta_new, reads ]
}
FASTQC_POST ( ch_fastp_reads_prepped )
ch_versions = ch_versions.mix(FASTP_SINGLE.out.versions.first())
ch_versions = ch_versions.mix(FASTP_PAIRED.out.versions.first())
ch_processed_reads = ch_fastp_reads_prepped
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_POST.out.zip.collect{it[1]} )
ch_multiqc_files = ch_multiqc_files.mix( FASTP_SINGLE.out.json.collect{it[1]} )
ch_multiqc_files = ch_multiqc_files.mix( FASTP_PAIRED.out.json.collect{it[1]} )
ch_multiqc_files.dump(tag: "preprocessing_mqc_final")
if ( params.shortread_clipmerge_tool == "fastp" ) {
ch_processed_reads = SHORTREAD_FASTP ( reads ).reads
ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc )
} else if ( params.shortread_clipmerge_tool == "adapterremoval" ) {
ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads
ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc )
} else { } else {
ch_processed_reads = reads ch_processed_reads = reads
} }
FASTQC_PROCESSED ( ch_processed_reads )
ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip.collect{it[1]} )
emit: emit:
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ] reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]

View file

@ -17,6 +17,8 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true
// Check mandatory parameters // Check mandatory parameters
if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' } if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] warning: MALT does not except uncollapsed paired-reads. Pairs will be profiled as separate files."
if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "[nf-core/taxprofiler] error: cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs"
/* /*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -36,11 +38,11 @@ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multi
// //
// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
// //
include { INPUT_CHECK } from '../subworkflows/local/input_check' include { INPUT_CHECK } from '../subworkflows/local/input_check'
include { DB_CHECK } from '../subworkflows/local/db_check' include { DB_CHECK } from '../subworkflows/local/db_check'
include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing' include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing'
include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing' include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing'
/* /*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -59,6 +61,7 @@ include { CAT_FASTQ } from '../modules/nf-core/modules/cat/fas
include { MALT_RUN } from '../modules/nf-core/modules/malt/run/main' include { MALT_RUN } from '../modules/nf-core/modules/malt/run/main'
include { KRAKEN2_KRAKEN2 } from '../modules/nf-core/modules/kraken2/kraken2/main' include { KRAKEN2_KRAKEN2 } from '../modules/nf-core/modules/kraken2/kraken2/main'
include { CENTRIFUGE } from '../modules/nf-core/modules/centrifuge/main' include { CENTRIFUGE } from '../modules/nf-core/modules/centrifuge/main'
include { METAPHLAN3 } from '../modules/nf-core/modules/metaphlan3/main'
/* /*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -73,9 +76,9 @@ workflow TAXPROFILER {
ch_versions = Channel.empty() ch_versions = Channel.empty()
// /*
// SUBWORKFLOW: Read in samplesheet, validate and stage input files SUBWORKFLOW: Read in samplesheet, validate and stage input files
// */
INPUT_CHECK ( INPUT_CHECK (
ch_input ch_input
) )
@ -85,22 +88,24 @@ workflow TAXPROFILER {
ch_databases ch_databases
) )
// /*
// MODULE: Run FastQC MODULE: Run FastQC
// */
ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore ).dump(tag: "input_to_fastq") ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore ).dump(tag: "input_to_fastq")
FASTQC ( FASTQC (
ch_input_for_fastqc ch_input_for_fastqc
) )
ch_versions = ch_versions.mix(FASTQC.out.versions.first()) ch_versions = ch_versions.mix(FASTQC.out.versions.first())
CUSTOM_DUMPSOFTWAREVERSIONS ( CUSTOM_DUMPSOFTWAREVERSIONS (
ch_versions.unique().collectFile(name: 'collated_versions.yml') ch_versions.unique().collectFile(name: 'collated_versions.yml')
) )
// /*
// PERFORM PREPROCESSING SUBWORKFLOW: PERFORM PREPROCESSING
// */
if ( params.shortread_clipmerge ) { if ( params.shortread_clipmerge ) {
ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq ).reads ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq ).reads
} else { } else {
@ -115,54 +120,31 @@ workflow TAXPROFILER {
ch_longreads_preprocessed = INPUT_CHECK.out.nanopore ch_longreads_preprocessed = INPUT_CHECK.out.nanopore
} }
// /*
// PERFORM SHORT READ RUN MERGING COMBINE READS WITH POSSIBLE DATABASES
// TODO: Check not necessary for long reads too? */
//
ch_processed_for_combine = ch_shortreads_preprocessed
.dump(tag: "prep_for_combine_grouping")
.map {
meta, reads ->
def meta_new = meta.clone()
meta_new['run_accession'] = 'combined'
[ meta_new, reads ]
}
.groupTuple ( by: 0 )
.branch{
combine: it[1].size() >= 2
skip: it[1].size() < 2
}
CAT_FASTQ ( ch_processed_for_combine.combine )
ch_reads_for_profiling = ch_processed_for_combine.skip
.dump(tag: "skip_combine")
.mix( CAT_FASTQ.out.reads )
.dump(tag: "files_for_profiling")
//
// COMBINE READS WITH POSSIBLE DATABASES
//
// e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90] // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
ch_input_for_profiling = ch_reads_for_profiling ch_input_for_profiling = ch_shortreads_preprocessed
.mix( ch_longreads_preprocessed ) .mix( ch_longreads_preprocessed )
.combine(DB_CHECK.out.dbs) .combine(DB_CHECK.out.dbs)
.dump(tag: "reads_plus_db")
.branch { .branch {
malt: it[2]['tool'] == 'malt' malt: it[2]['tool'] == 'malt'
kraken2: it[2]['tool'] == 'kraken2' kraken2: it[2]['tool'] == 'kraken2'
metaphlan3: it[2]['tool'] == 'metaphlan3'
centrifuge: it[2]['tool'] == 'centrifuge' centrifuge: it[2]['tool'] == 'centrifuge'
unknown: true unknown: true
} }
// /*
// PREPARE PROFILER INPUT CHANNELS PREPARE PROFILER INPUT CHANNELS
// */
// We groupTuple to have all samples in one channel for MALT as database // We groupTuple to have all samples in one channel for MALT as database
// loading takes a long time, so we only want to run it once per database // loading takes a long time, so we only want to run it once per database
// TODO document somewhere we only accept illumina short reads for MALT?
ch_input_for_malt = ch_input_for_profiling.malt ch_input_for_malt = ch_input_for_profiling.malt
.filter { it[0]['instrument_platform'] == 'ILLUMINA' }
.map { .map {
it -> it ->
def temp_meta = [ id: it[2]['db_name']] + it[2] def temp_meta = [ id: it[2]['db_name']] + it[2]
@ -170,7 +152,6 @@ workflow TAXPROFILER {
[ temp_meta, it[1], db ] [ temp_meta, it[1], db ]
} }
.groupTuple(by: [0,2]) .groupTuple(by: [0,2])
.dump(tag: "input for malt")
.multiMap { .multiMap {
it -> it ->
reads: [ it[0], it[1].flatten() ] reads: [ it[0], it[1].flatten() ]
@ -179,7 +160,6 @@ workflow TAXPROFILER {
// We can run Kraken2 one-by-one sample-wise // We can run Kraken2 one-by-one sample-wise
ch_input_for_kraken2 = ch_input_for_profiling.kraken2 ch_input_for_kraken2 = ch_input_for_profiling.kraken2
.dump(tag: "input for kraken")
.multiMap { .multiMap {
it -> it ->
reads: [ it[0] + it[2], it[1] ] reads: [ it[0] + it[2], it[1] ]
@ -198,6 +178,17 @@ workflow TAXPROFILER {
// //
// RUN PROFILING // RUN PROFILING
// //
ch_input_for_metaphlan3 = ch_input_for_profiling.metaphlan3
.dump(tag: "input_metaphlan3")
.multiMap {
it ->
reads: [it[0] + it[2], it[1]]
db: it[3]
}
/*
MODULE: RUN PROFILING
*/
if ( params.run_malt ) { if ( params.run_malt ) {
MALT_RUN ( ch_input_for_malt.reads, params.malt_mode, ch_input_for_malt.db ) MALT_RUN ( ch_input_for_malt.reads, params.malt_mode, ch_input_for_malt.db )
} }
@ -207,12 +198,16 @@ workflow TAXPROFILER {
} }
if ( params.run_centrifuge ) { if ( params.run_centrifuge ) {
CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_unaligned, params.centrifuge_save_aligned, params.centrifuge_sam_format ) CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_db_name, params.centrifuge_save_unaligned, params.centrifuge_save_aligned, params.centrifuge_sam_format )
} }
// if ( params.run_metaphlan3 ) {
// MODULE: MultiQC METAPHLAN3 ( ch_input_for_metaphlan3.reads, ch_input_for_metaphlan3.db )
// }
/*
MODULE: MultiQC
*/
workflow_summary = WorkflowTaxprofiler.paramsSummaryMultiqc(workflow, summary_params) workflow_summary = WorkflowTaxprofiler.paramsSummaryMultiqc(workflow, summary_params)
ch_workflow_summary = Channel.value(workflow_summary) ch_workflow_summary = Channel.value(workflow_summary)
@ -240,6 +235,7 @@ workflow TAXPROFILER {
// TODO MALT results overwriting per database? // TODO MALT results overwriting per database?
// TODO Versions for Karken/MALT not report? // TODO Versions for Karken/MALT not report?
// TODO create multiQC module for metaphlan
MULTIQC ( MULTIQC (
ch_multiqc_files.collect() ch_multiqc_files.collect()
) )