mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-10 22:03:09 +00:00
Merge branch 'dev' into update_taxpasta_version
This commit is contained in:
commit
0007c99d1c
30 changed files with 1046 additions and 766 deletions
1
.github/workflows/awsfulltest.yml
vendored
1
.github/workflows/awsfulltest.yml
vendored
|
@ -15,7 +15,6 @@ jobs:
|
|||
steps:
|
||||
- name: Launch workflow via tower
|
||||
uses: nf-core/tower-action@v3
|
||||
# TODO nf-core: You can customise AWS full pipeline tests as required
|
||||
# Add full size test data (but still relatively small datasets for few samples)
|
||||
# on the `test_full.config` test runs with only one set of parameters
|
||||
with:
|
||||
|
|
9
.github/workflows/ci.yml
vendored
9
.github/workflows/ci.yml
vendored
|
@ -29,19 +29,10 @@ jobs:
|
|||
- "latest-everything"
|
||||
parameters:
|
||||
- "--preprocessing_qc_tool falco"
|
||||
- "--perform_longread_qc false"
|
||||
- "--perform_shortread_qc false"
|
||||
- "--shortread_qc_tool fastp"
|
||||
- "--shortread_qc_tool fastp --shortread_qc_mergepairs --shortread_qc_includeunmerged"
|
||||
- "--shortread_qc_tool fastp --shortread_qc_mergepairs"
|
||||
- "--shortread_qc_tool adapterremoval"
|
||||
- "--shortread_qc_tool adapterremoval --shortread_qc_mergepairs --shortread_qc_includeunmerged"
|
||||
- "--shortread_qc_tool adapterremoval --shortread_qc_mergepairs"
|
||||
- "--shortread_complexityfilter_tool bbduk"
|
||||
- "--shortread_complexityfilter_tool prinseqplusplus"
|
||||
- "--perform_runmerging"
|
||||
- "--perform_runmerging --shortread_qc_mergepairs"
|
||||
- "--shortread_complexityfilter false --perform_shortread_hostremoval"
|
||||
|
||||
steps:
|
||||
- name: Check out pipeline code
|
||||
|
|
10
CHANGELOG.md
10
CHANGELOG.md
|
@ -3,10 +3,18 @@
|
|||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## v1.0dev - [date]
|
||||
## v1.0.0 - Dodgy Dachshund [date]
|
||||
|
||||
Initial release of nf-core/taxprofiler, created with the [nf-core](https://nf-co.re/) template.
|
||||
|
||||
- Add read quality control (sequencing QC, adapter removal and merging)
|
||||
- Add read complexity filtering
|
||||
- Add host-reads removal step
|
||||
- Add run merging
|
||||
- Add taxonomic classification
|
||||
- Add taxon table standardisation
|
||||
- Add post-classification visualisation
|
||||
|
||||
### `Added`
|
||||
|
||||
### `Fixed`
|
||||
|
|
46
CITATIONS.md
46
CITATIONS.md
|
@ -16,6 +16,10 @@
|
|||
|
||||
> Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.
|
||||
|
||||
- [falco](https://doi.org/10.12688/f1000research.21142.2)
|
||||
|
||||
> de Sena Brandine G and Smith AD. Falco: high-speed FastQC emulation for quality control of sequencing data. F1000Research 2021, 8:1874
|
||||
|
||||
- [fastp](https://doi.org/10.1093/bioinformatics/bty560)
|
||||
|
||||
> Chen, Shifu, Yanqing Zhou, Yaru Chen, and Jia Gu. 2018. Fastp: An Ultra-Fast All-in-One FASTQ Preprocessor. Bioinformatics 34 (17): i884-90. 10.1093/bioinformatics/bty560.
|
||||
|
@ -26,12 +30,30 @@
|
|||
|
||||
- [Porechop](https://github.com/rrwick/Porechop)
|
||||
|
||||
- [FILTLONG](https://github.com/rrwick/Filtlong)
|
||||
|
||||
- [BBTools](http://sourceforge.net/projects/bbmap/)
|
||||
|
||||
- [PRINSEQ++](https://doi.org/10.7287/peerj.preprints.27553v1)
|
||||
|
||||
> Cantu, Vito Adrian, Jeffrey Sadural, and Robert Edwards. 2019. PRINSEQ++, a Multi-Threaded Tool for Fast and Efficient Quality Control and Preprocessing of Sequencing Datasets. e27553v1. PeerJ Preprints. doi: 10.7287/peerj.preprints.27553v1.
|
||||
|
||||
- [Bowtie2](https://doi.org/10.1038/nmeth.1923)
|
||||
|
||||
> Langmead, B., & Salzberg, S. L. (2012). Fast gapped-read alignment with Bowtie 2. Nature Methods, 9(4), 357–359. doi: 10.1038/nmeth.1923
|
||||
|
||||
- [minimap2](https://doi.org/10.1093/bioinformatics/bty191)
|
||||
|
||||
> Li, H. (2018). Minimap2: pairwise alignment for nucleotide sequences. Bioinformatics , 34(18), 3094–3100. doi: 10.1093/bioinformatics/bty191
|
||||
|
||||
- [SAMTools](https://doi.org/10.1093/gigascience/giab008)
|
||||
|
||||
> Danecek, P., Bonfield, J. K., Liddle, J., Marshall, J., Ohan, V., Pollard, M. O., Whitwham, A., Keane, T., McCarthy, S. A., Davies, R. M., & Li, H. (2021). Twelve years of SAMtools and BCFtools. GigaScience, 10(2). doi: 10.1093/gigascience/giab008
|
||||
|
||||
- [Bracken](https://doi.org/10.7717/peerj-cs.104)
|
||||
|
||||
> Lu, J., Breitwieser, F. P., Thielen, P., & Salzberg, S. L. (2017). Bracken: Estimating species abundance in metagenomics data. PeerJ Computer Science, 3, e104. doi: 10.7717/peerj-cs.104
|
||||
|
||||
- [Kraken2](https://doi.org/10.1186/s13059-019-1891-0)
|
||||
|
||||
> Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. Improved Metagenomic Analysis with Kraken 2. Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0.
|
||||
|
@ -40,13 +62,9 @@
|
|||
|
||||
> Breitwieser, Florian P., Daniel N. Baker, and Steven L. Salzberg. 2018. KrakenUniq: confident and fast metagenomics classification using unique k-mer counts. Genome Biology 19 (1): 198. doi: 10.1186/s13059-018-1568-0
|
||||
|
||||
- [Bracken](https://doi.org/10.7717/peerj-cs.104)
|
||||
- [MetaPhlAn3](https://doi.org/10.7554/eLife.65088)
|
||||
|
||||
> Lu, J., Breitwieser, F. P., Thielen, P., & Salzberg, S. L. (2017). Bracken: Estimating species abundance in metagenomics data. PeerJ Computer Science, 3, e104. doi: 10.7717/peerj-cs.104
|
||||
|
||||
- [Krona](https://doi.org/10.1186/1471-2105-12-385)
|
||||
|
||||
> Ondov, Brian D., Nicholas H. Bergman, and Adam M. Phillippy. 2011. Interactive metagenomic visualization in a Web browser. BMC Bioinformatics 12 (1): 385. doi: 10.1186/1471-2105-12-385.
|
||||
> Beghini, Francesco, Lauren J McIver, Aitor Blanco-Míguez, Leonard Dubois, Francesco Asnicar, Sagun Maharjan, Ana Mailyan, et al. 2021. “Integrating Taxonomic, Functional, and Strain-Level Profiling of Diverse Microbial Communities with BioBakery 3.” Edited by Peter Turnbaugh, Eduardo Franco, and C Titus Brown. ELife 10 (May): e65088. doi: 10.7554/eLife.65088
|
||||
|
||||
- [MALT](https://doi.org/10.1038/s41559-017-0446-6)
|
||||
|
||||
|
@ -56,23 +74,25 @@
|
|||
|
||||
> Huson, Daniel H., Sina Beier, Isabell Flade, Anna Górska, Mohamed El-Hadidi, Suparna Mitra, Hans-Joachim Ruscheweyh, and Rewati Tappu. 2016. “MEGAN Community Edition - Interactive Exploration and Analysis of Large-Scale Microbiome Sequencing Data.” PLoS Computational Biology 12 (6): e1004957. doi: 10.1371/journal.pcbi.1004957.
|
||||
|
||||
- [MetaPhlAn3](https://doi.org/10.7554/eLife.65088)
|
||||
- [DIAMOND](https://doi.org/10.1038/nmeth.3176)
|
||||
|
||||
> Beghini, Francesco, Lauren J McIver, Aitor Blanco-Míguez, Leonard Dubois, Francesco Asnicar, Sagun Maharjan, Ana Mailyan, et al. 2021. “Integrating Taxonomic, Functional, and Strain-Level Profiling of Diverse Microbial Communities with BioBakery 3.” Edited by Peter Turnbaugh, Eduardo Franco, and C Titus Brown. ELife 10 (May): e65088. doi: 10.7554/eLife.65088
|
||||
> Buchfink, Benjamin, Chao Xie, and Daniel H. Huson. 2015. “Fast and Sensitive Protein Alignment Using DIAMOND.” Nature Methods 12 (1): 59-60. doi: 10.1038/nmeth.3176.
|
||||
|
||||
- [Centrifuge](https://doi.org/10.1101/gr.210641.116)
|
||||
|
||||
> Kim, Daehwan, Li Song, Florian P. Breitwieser, and Steven L. Salzberg. 2016. “Centrifuge: Rapid and Sensitive Classification of Metagenomic Sequences.” Genome Research 26 (12): 1721-29. doi: 10.1101/gr.210641.116.
|
||||
|
||||
- [DIAMOND](https://doi.org/10.1038/nmeth.3176)
|
||||
- [Kaiju](https://doi.org/10.1038/ncomms11257)
|
||||
|
||||
> Buchfink, Benjamin, Chao Xie, and Daniel H. Huson. 2015. “Fast and Sensitive Protein Alignment Using DIAMOND.” Nature Methods 12 (1): 59-60. doi: 10.1038/nmeth.3176.
|
||||
> Menzel, P., Ng, K. L., & Krogh, A. (2016). Fast and sensitive taxonomic classification for metagenomics with Kaiju. Nature Communications, 7, 11257. doi: 10.1038/ncomms11257
|
||||
|
||||
- [FILTLONG](https://github.com/rrwick/Filtlong)
|
||||
- [mOTUs](https://doi.org/10.1186/s40168-022-01410-z)
|
||||
|
||||
- [falco](https://doi.org/10.12688/f1000research.21142.2)
|
||||
> Ruscheweyh, H.-J., Milanese, A., Paoli, L., Karcher, N., Clayssen, Q., Keller, M. I., Wirbel, J., Bork, P., Mende, D. R., Zeller, G., & Sunagawa, S. (2022). Cultivation-independent genomes greatly expand taxonomic-profiling capabilities of mOTUs across various environments. Microbiome, 10(1), 212. doi: 10.1186/s40168-022-01410-z
|
||||
|
||||
> de Sena Brandine G and Smith AD. Falco: high-speed FastQC emulation for quality control of sequencing data. F1000Research 2021, 8:1874
|
||||
- [Krona](https://doi.org/10.1186/1471-2105-12-385)
|
||||
|
||||
> Ondov, Brian D., Nicholas H. Bergman, and Adam M. Phillippy. 2011. Interactive metagenomic visualization in a Web browser. BMC Bioinformatics 12 (1): 385. doi: 10.1186/1471-2105-12-385.
|
||||
|
||||
## Software packaging/containerisation tools
|
||||
|
||||
|
|
2
LICENSE
2
LICENSE
|
@ -1,6 +1,6 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) nf-core community
|
||||
Copyright (c) James A. Fellows Yates, Sofia Stamouli, Moritz E. Beber, Lauri Mesilaakso, Thomas A. Christensen II, Jianhong Ou, Mahwash Jamy, Maxime Borry, Rafal Stepien, Tanja Normark
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
|
23
README.md
23
README.md
|
@ -12,20 +12,16 @@
|
|||
|
||||
## Introduction
|
||||
|
||||
> ⚠️ This pipeline is still under development! While the pipeline is usable, not all functionality will be available!
|
||||
|
||||
**nf-core/taxprofiler** is a bioinformatics best-practice analysis pipeline for taxonomic classification and profiling of shotgun metagenomic data. It allows for in-parallel taxonomic identification of reads or taxonomic abundance estimation with multiple classification and profiling tools against multiple databases, produces standardised output tables.
|
||||
|
||||
The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!
|
||||
|
||||
On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources.The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/taxprofiler/results).
|
||||
|
||||
The nf-core/taxprofiler CI test dataset uses sequencing data from [Maixer et al. (2021) Curr. Bio.](https://doi.org/10.1016/j.cub.2021.09.031). The AWS full test dataset uses sequencing data and reference genomes from [Meslier (2022) _Sci. Data_](https://doi.org/10.1038/s41597-022-01762-z)
|
||||
The nf-core/taxprofiler CI test dataset uses sequencing data from [Maixner et al. (2021) Curr. Bio.](https://doi.org/10.1016/j.cub.2021.09.031). The AWS full test dataset uses sequencing data and reference genomes from [Meslier (2022) _Sci. Data_](https://doi.org/10.1038/s41597-022-01762-z)
|
||||
|
||||
## Pipeline summary
|
||||
|
||||
<!-- TODO nf-core: Fill in short bullet-pointed list of the default steps in the pipeline -->
|
||||
|
||||
![](docs/images/taxprofiler_tube.png)
|
||||
|
||||
1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) or [`falco`](https://github.com/smithlabcode/falco) as an alternative option)
|
||||
|
@ -46,7 +42,7 @@ The nf-core/taxprofiler CI test dataset uses sequencing data from [Maixer et al.
|
|||
- [KrakenUniq](https://github.com/fbreitwieser/krakenuniq)
|
||||
5. Perform optional post-processing with:
|
||||
- [bracken](https://ccb.jhu.edu/software/bracken/)
|
||||
6. Standardises output tables
|
||||
6. Standardises output tables ([`Taxpasta`](https://taxpasta.readthedocs.io))
|
||||
7. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))
|
||||
8. Plotting Kraken2, Centrifuge, Kaiju and MALT results ([`Krona`](https://hpc.nih.gov/apps/kronatools.html))
|
||||
|
||||
|
@ -81,11 +77,18 @@ The nf-core/taxprofiler pipeline comes with documentation about the pipeline [us
|
|||
|
||||
## Credits
|
||||
|
||||
nf-core/taxprofiler was originally written by nf-core community.
|
||||
nf-core/taxprofiler was originally written by [James A. Fellows Yates](https://github.com/jfy133), [Moritz Beber](https://github.com/Midnighter), and [Sofia Stamouli](https://github.com/sofsam).
|
||||
|
||||
We thank the following people for their extensive assistance in the development of this pipeline:
|
||||
We thank the following people for their contributions to the development of this pipeline:
|
||||
|
||||
[James A. Fellows Yates](https://github.com/jfy133), [Moritz Beber](https://github.com/Midnighter), [Lauri Mesilaakso](https://github.com/ljmesi), [Sofia Stamouli](https://github.com/sofsam), [Maxime Borry](https://github.com/maxibor),[Thomas A. Christensen II](https://github.com/MillironX), [Jianhong Ou](https://github.com/jianhong), [Rafal Stepien](https://github.com/rafalstepien), [Mahwash Jamy](https://github.com/mjamy).
|
||||
[Lauri Mesilaakso](https://github.com/ljmesi), [Tanja Normark](https://github.com/talnor), [Maxime Borry](https://github.com/maxibor),[Thomas A. Christensen II](https://github.com/MillironX), [Jianhong Ou](https://github.com/jianhong), [Rafal Stepien](https://github.com/rafalstepien), [Mahwash Jamy](https://github.com/mjamy), and the [nf-core/community](https://nf-co.re/community).
|
||||
|
||||
We also are grateful for the feedback and comments from:
|
||||
|
||||
- [Alex Hübner](https://github.com/alexhbnr)
|
||||
- [LilyAnderssonLee](https://github.com/LilyAnderssonLee)
|
||||
|
||||
Credit and thanks also goes to [Zandra Fagernäs](https://github.com/ZandraFagernas) for the logo.
|
||||
|
||||
## Contributions and Support
|
||||
|
||||
|
@ -98,8 +101,6 @@ For further information or help, don't hesitate to get in touch on the [Slack `#
|
|||
<!-- TODO nf-core: Add citation for pipeline after first release. Uncomment lines below and update Zenodo doi and badge at the top of this file. -->
|
||||
<!-- If you use nf-core/taxprofiler for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) -->
|
||||
|
||||
<!-- TODO nf-core: Add bibliography of tools and data used in your pipeline -->
|
||||
|
||||
An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.
|
||||
|
||||
You can cite the `nf-core` publication as follows:
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
|
||||
<meta name="description" content="nf-core/taxprofiler: Taxonomic profiling of shotgun metagenomic data">
|
||||
<meta name="description" content="nf-core/taxprofiler: Taxonomic classification and profiling of shotgun metagenomic data">
|
||||
<title>nf-core/taxprofiler Pipeline Report</title>
|
||||
</head>
|
||||
<body>
|
||||
|
|
|
@ -39,33 +39,30 @@ sp:
|
|||
diamond:
|
||||
contents: "diamond v"
|
||||
num_lines: 10
|
||||
|
||||
#extra_fn_clean_exts:
|
||||
# - '_fastp'
|
||||
# - '.pe.settings'
|
||||
# - '.se.settings'
|
||||
fastqc/data:
|
||||
fn_re: ".*(fastqc|falco)_data.txt$"
|
||||
fastqc/zip:
|
||||
fn: "*_fastqc.zip"
|
||||
|
||||
top_modules:
|
||||
- "fastqc":
|
||||
name: "FastQC (pre-Trimming)"
|
||||
name: "FastQC / Falco (pre-Trimming)"
|
||||
path_filters:
|
||||
- "*raw_*fastqc.zip"
|
||||
- "*raw*"
|
||||
path_filters_exclude:
|
||||
- "*processed*"
|
||||
extra: "If used in this run, Falco is a drop-in replacement for FastQC producing the same output, written by Guilherme de Sena Brandine and Andrew D. Smith."
|
||||
- "fastqc":
|
||||
name: "Falco (pre-Trimming)"
|
||||
name: "FastQC / Falco (post-Trimming)"
|
||||
path_filters:
|
||||
- "*_raw_falco_*_report.html"
|
||||
- "*processed*"
|
||||
path_filters_exclude:
|
||||
- "*raw*"
|
||||
extra: "If used in this run, Falco is a drop-in replacement for FastQC producing the same output, written by Guilherme de Sena Brandine and Andrew D. Smith."
|
||||
- "fastp"
|
||||
- "adapterRemoval"
|
||||
- "porechop":
|
||||
extra: "ℹ️: if you get the error message 'Error - was not able to plot data.' this means that porechop did not detect any adapters and therefore no statistics generated."
|
||||
- "fastqc":
|
||||
name: "FastQC (post-Trimming)"
|
||||
path_filters:
|
||||
- "*_processed_*fastqc.zip"
|
||||
- "fastqc":
|
||||
name: "Falco (post-Trimming)"
|
||||
path_filters:
|
||||
- "*_processed_falco_*_report.html"
|
||||
- "bbduk"
|
||||
- "prinseqplusplus"
|
||||
- "filtlong"
|
||||
|
@ -105,19 +102,20 @@ top_modules:
|
|||
#It is not possible to set placement for custom kraken and centrifuge columns.
|
||||
|
||||
table_columns_placement:
|
||||
FastQC (pre-Trimming):
|
||||
FastQC / Falco (pre-Trimming):
|
||||
total_sequences: 100
|
||||
avg_sequence_length: 110
|
||||
median_sequence_length: 120
|
||||
percent_duplicates: 130
|
||||
percent_gc: 140
|
||||
percent_fails: 150
|
||||
Falco (pre-Trimming):
|
||||
FastQC / Falco (post-Trimming):
|
||||
total_sequences: 200
|
||||
avg_sequence_length: 210
|
||||
percent_duplicates: 220
|
||||
percent_gc: 230
|
||||
percent_fails: 240
|
||||
median_sequence_length: 220
|
||||
percent_duplicates: 230
|
||||
percent_gc: 240
|
||||
percent_fails: 250
|
||||
fastp:
|
||||
pct_adapter: 300
|
||||
pct_surviving: 310
|
||||
|
@ -141,19 +139,6 @@ table_columns_placement:
|
|||
Middle Split Percent: 460
|
||||
Filtlong:
|
||||
Target bases: 500
|
||||
FastQC (post-Trimming):
|
||||
total_sequences: 600
|
||||
avg_sequence_length: 610
|
||||
median_sequence_length: 620
|
||||
percent_duplicates: 630
|
||||
percent_gc: 640
|
||||
percent_fails: 650
|
||||
Falco (post-Trimming):
|
||||
total_sequences: 700
|
||||
avg_sequence_length: 710
|
||||
percent_duplicates: 720
|
||||
percent_gc: 730
|
||||
percent_fails: 740
|
||||
BBDuk:
|
||||
Input reads: 800
|
||||
Total Removed bases percent: 810
|
||||
|
@ -205,25 +190,18 @@ table_columns_placement:
|
|||
"Number of ext-mOTUs": 1880
|
||||
|
||||
table_columns_visible:
|
||||
FastQC (pre-Trimming):
|
||||
FastQC / Falco (pre-Trimming):
|
||||
total_sequences: True
|
||||
avg_sequence_length: True
|
||||
percent_duplicates: True
|
||||
percent_gc: True
|
||||
percent_fails: False
|
||||
Falco (pre-Trimming):
|
||||
FastQC / Falco (post-Trimming):
|
||||
total_sequences: True
|
||||
avg_sequence_length: True
|
||||
percent_duplicates: True
|
||||
percent_gc: True
|
||||
percent_duplicates: False
|
||||
percent_gc: False
|
||||
percent_fails: False
|
||||
fastp:
|
||||
pct_adapter: True
|
||||
pct_surviving: True
|
||||
pct_duplication: False
|
||||
after_filtering_gc_content: False
|
||||
after_filtering_q30_rate: False
|
||||
after_filtering_q30_bases: False
|
||||
porechop:
|
||||
Input reads: False
|
||||
Start Trimmed:
|
||||
|
@ -232,6 +210,13 @@ table_columns_visible:
|
|||
End Trimmed Percent: True
|
||||
Middle Split: False
|
||||
Middle Split Percent: True
|
||||
fastp:
|
||||
pct_adapter: True
|
||||
pct_surviving: True
|
||||
pct_duplication: False
|
||||
after_filtering_gc_content: False
|
||||
after_filtering_q30_rate: False
|
||||
after_filtering_q30_bases: False
|
||||
Filtlong:
|
||||
Target bases: True
|
||||
Adapter Removal:
|
||||
|
@ -239,18 +224,6 @@ table_columns_visible:
|
|||
percent_aligned: True
|
||||
percent_collapsed: True
|
||||
percent_discarded: False
|
||||
FastQC (post-Trimming):
|
||||
total_sequences: True
|
||||
avg_sequence_length: True
|
||||
percent_duplicates: False
|
||||
percent_gc: False
|
||||
percent_fails: False
|
||||
Falco (post-Trimming):
|
||||
total_sequences: True
|
||||
avg_sequence_length: True
|
||||
percent_duplicates: False
|
||||
percent_gc: False
|
||||
percent_fails: False
|
||||
BBDuk:
|
||||
Input reads: False
|
||||
Total Removed bases Percent: False
|
||||
|
@ -278,25 +251,13 @@ table_columns_visible:
|
|||
motus: False
|
||||
|
||||
table_columns_name:
|
||||
FastQC (pre-Trimming):
|
||||
FastQC / Falco (pre-Trimming):
|
||||
total_sequences: "Nr. Input Reads"
|
||||
avg_sequence_length: "Length Input Reads"
|
||||
percent_gc: "% GC Input Reads"
|
||||
percent_duplicates: "% Dups Input Reads"
|
||||
percent_fails: "% Failed Input Reads"
|
||||
Falco (pre-Trimming):
|
||||
total_sequences: "Nr. Input Reads"
|
||||
avg_sequence_length: "Length Input Reads"
|
||||
percent_gc: "% GC Input Reads"
|
||||
percent_duplicates: "% Dups Input Reads"
|
||||
percent_fails: "% Failed Input Reads"
|
||||
FastQC (post-Trimming):
|
||||
total_sequences: "Nr. Processed Reads"
|
||||
avg_sequence_length: "Length Processed Reads"
|
||||
percent_gc: "% GC Processed Reads"
|
||||
percent_duplicates: "% Dups Processed Reads"
|
||||
percent_fails: "% Failed Processed Reads"
|
||||
Falco (post-Trimming):
|
||||
FastQC / Falco (post-Trimming):
|
||||
total_sequences: "Nr. Processed Reads"
|
||||
avg_sequence_length: "Length Processed Reads"
|
||||
percent_gc: "% GC Processed Reads"
|
||||
|
@ -314,7 +275,8 @@ extra_fn_clean_exts:
|
|||
- ".bbduk"
|
||||
- ".unmapped"
|
||||
- "_filtered"
|
||||
- "_processed"
|
||||
- type: remove
|
||||
pattern: "_falco"
|
||||
|
||||
section_comments:
|
||||
general_stats: "By default, all read count columns are displayed as millions (M) of reads."
|
||||
|
|
|
@ -10,7 +10,6 @@
|
|||
|
||||
process {
|
||||
|
||||
// TODO nf-core: Check the defaults for all processes
|
||||
cpus = { check_max( 1 * task.attempt, 'cpus' ) }
|
||||
memory = { check_max( 6.GB * task.attempt, 'memory' ) }
|
||||
time = { check_max( 4.h * task.attempt, 'time' ) }
|
||||
|
@ -24,7 +23,6 @@ process {
|
|||
// These labels are used and recognised by default in DSL2 files hosted on nf-core/modules.
|
||||
// If possible, it would be nice to keep the same label naming convention when
|
||||
// adding in your local modules too.
|
||||
// TODO nf-core: Customise requirements for specific processes.
|
||||
// See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
|
||||
withLabel:process_single {
|
||||
cpus = { check_max( 1 , 'cpus' ) }
|
||||
|
@ -62,6 +60,19 @@ process {
|
|||
withName:CUSTOM_DUMPSOFTWAREVERSIONS {
|
||||
cache = false
|
||||
}
|
||||
|
||||
withName: BRACKEN_BRACKEN {
|
||||
errorStrategy = 'ignore'
|
||||
}
|
||||
|
||||
withName: CENTRIFUGE_KREPORT {
|
||||
errorStrategy = {task.exitStatus == 255 ? 'ignore' : 'retry'}
|
||||
}
|
||||
|
||||
withName: KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE {
|
||||
errorStrategy = { task.exitStatus in [255,1] ? 'ignore' : 'retry' }
|
||||
}
|
||||
|
||||
withName: MEGAN_RMA2INFO_TSV {
|
||||
cpus = { check_max( 1 , 'cpus' ) }
|
||||
memory = { check_max( 6.GB * task.attempt, 'memory' ) }
|
||||
|
|
|
@ -196,8 +196,8 @@ process {
|
|||
publishDir = [
|
||||
path: { "${params.outdir}/bowtie2/build" },
|
||||
mode: params.publish_dir_mode,
|
||||
enabled: params.save_hostremoval_index,
|
||||
pattern: 'bowtie2'
|
||||
pattern: 'bowtie2',
|
||||
enabled: params.save_hostremoval_index
|
||||
]
|
||||
}
|
||||
|
||||
|
@ -213,14 +213,14 @@ process {
|
|||
[
|
||||
path: { "${params.outdir}/bowtie2/align" },
|
||||
mode: params.publish_dir_mode,
|
||||
enabled: params.save_hostremoval_bam,
|
||||
pattern: '*.bam'
|
||||
pattern: '*.bam',
|
||||
enabled: params.save_hostremoval_bam
|
||||
],
|
||||
[
|
||||
path: { "${params.outdir}/bowtie2/align" },
|
||||
mode: params.publish_dir_mode,
|
||||
enabled: params.save_hostremoval_unmapped,
|
||||
pattern: '*.fastq.gz'
|
||||
pattern: '*.fastq.gz',
|
||||
enabled: params.save_hostremoval_unmapped
|
||||
]
|
||||
]
|
||||
}
|
||||
|
@ -230,8 +230,8 @@ process {
|
|||
publishDir = [
|
||||
path: { "${params.outdir}/minimap2/index" },
|
||||
mode: params.publish_dir_mode,
|
||||
enabled: params.save_hostremoval_index,
|
||||
pattern: '*.mmi'
|
||||
pattern: '*.mmi',
|
||||
enabled: params.save_hostremoval_index
|
||||
]
|
||||
}
|
||||
|
||||
|
@ -240,8 +240,8 @@ process {
|
|||
publishDir = [
|
||||
path: { "${params.outdir}/minimap2/align" },
|
||||
mode: params.publish_dir_mode,
|
||||
enabled: params.save_hostremoval_bam,
|
||||
pattern: '*.bam'
|
||||
pattern: '*.bam',
|
||||
enabled: params.save_hostremoval_bam
|
||||
]
|
||||
}
|
||||
|
||||
|
@ -255,8 +255,8 @@ process {
|
|||
publishDir = [
|
||||
path: { "${params.outdir}/samtools/bam2fq" },
|
||||
mode: params.publish_dir_mode,
|
||||
enabled: params.save_hostremoval_unmapped,
|
||||
pattern: '*.fq.gz'
|
||||
pattern: '*.fq.gz',
|
||||
enabled: params.save_hostremoval_unmapped
|
||||
]
|
||||
}
|
||||
|
||||
|
@ -354,7 +354,6 @@ process {
|
|||
}
|
||||
|
||||
withName: BRACKEN_BRACKEN {
|
||||
errorStrategy = 'ignore'
|
||||
ext.args = { "${meta.db_params}" }
|
||||
ext.prefix = params.perform_runmerging ? { "${meta.id}_${meta.db_name}.bracken" } : { "${meta.id}_${meta.run_accession}_${meta.db_name}.bracken" }
|
||||
publishDir = [
|
||||
|
@ -446,17 +445,16 @@ process {
|
|||
}
|
||||
|
||||
withName: CENTRIFUGE_CENTRIFUGE {
|
||||
ext.args = { "${meta.db_params}" }
|
||||
ext.prefix = params.perform_runmerging ? { "${meta.id}_${meta.db_name}.centrifuge" } : { "${meta.id}_${meta.run_accession}_${meta.db_name}.centrifuge" }
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/centrifuge/${meta.db_name}/" },
|
||||
mode: params.publish_dir_mode,
|
||||
pattern: '*.{txt,sam,gz}'
|
||||
]
|
||||
ext.args = { "${meta.db_params}" }
|
||||
ext.prefix = params.perform_runmerging ? { "${meta.id}_${meta.db_name}.centrifuge" } : { "${meta.id}_${meta.run_accession}_${meta.db_name}.centrifuge" }
|
||||
}
|
||||
|
||||
withName: CENTRIFUGE_KREPORT {
|
||||
errorStrategy = {task.exitStatus == 255 ? 'ignore' : 'retry'}
|
||||
ext.args = { "${meta.db_params}" }
|
||||
ext.prefix = params.perform_runmerging ? { "${meta.id}_${meta.db_name}.centrifuge" } : { "${meta.id}_${meta.run_accession}_${meta.db_name}.centrifuge" }
|
||||
publishDir = [
|
||||
|
@ -467,7 +465,6 @@ process {
|
|||
}
|
||||
|
||||
withName: KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE {
|
||||
errorStrategy = { task.exitStatus in [255,1] ? 'ignore' : 'retry' }
|
||||
ext.prefix = { "centrifuge_${meta.id}_combined_reports" }
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/centrifuge/" },
|
||||
|
@ -477,16 +474,16 @@ process {
|
|||
}
|
||||
|
||||
withName: KAIJU_KAIJU {
|
||||
ext.args = { "${meta.db_params}" }
|
||||
ext.prefix = params.perform_runmerging ? { "${meta.id}_${meta.db_name}.kaiju" } : { "${meta.id}_${meta.run_accession}_${meta.db_name}.kaiju" }
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/kaiju/${meta.db_name}/" },
|
||||
mode: params.publish_dir_mode,
|
||||
pattern: '*.tsv'
|
||||
]
|
||||
ext.args = { "${meta.db_params}" }
|
||||
}
|
||||
|
||||
withName: '.*PROFILING:KAIJU_KAIJU2TABLE' {
|
||||
withName: 'KAIJU_KAIJU2TABLE_SINGLE' {
|
||||
ext.prefix = params.perform_runmerging ? { "${meta.id}_${meta.db_name}.kaijutable" } : { "${meta.id}_${meta.run_accession}_${meta.db_name}.kaijutable" }
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/kaiju/${meta.db_name}/" },
|
||||
|
@ -495,7 +492,7 @@ process {
|
|||
]
|
||||
}
|
||||
|
||||
withName: '.*STANDARDISATION_PROFILES:KAIJU_KAIJU2TABLE' {
|
||||
withName: 'KAIJU_KAIJU2TABLE_COMBINED' {
|
||||
ext.prefix = { "kaiju_${meta.id}_combined_reports" }
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/kaiju/" },
|
||||
|
|
|
@ -45,10 +45,11 @@ params {
|
|||
run_motus = false
|
||||
run_krona = true
|
||||
krona_taxonomy_directory = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab'
|
||||
malt_save_reads = true
|
||||
kraken2_save_reads = true
|
||||
centrifuge_save_reads = true
|
||||
diamond_save_reads = true
|
||||
malt_save_reads = false
|
||||
kraken2_save_reads = false
|
||||
centrifuge_save_reads = false
|
||||
diamond_save_reads = false
|
||||
run_profile_standardisation = true
|
||||
}
|
||||
|
||||
process {
|
||||
|
|
Binary file not shown.
Binary file not shown.
Before Width: | Height: | Size: 690 KiB After Width: | Height: | Size: 714 KiB |
File diff suppressed because it is too large
Load diff
Before Width: | Height: | Size: 269 KiB After Width: | Height: | Size: 289 KiB |
|
@ -6,8 +6,6 @@ This document describes the output produced by the pipeline. Most of the plots a
|
|||
|
||||
The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory.
|
||||
|
||||
<!-- TODO nf-core: Write this documentation describing your workflow's output -->
|
||||
|
||||
## Pipeline overview
|
||||
|
||||
The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps:
|
||||
|
@ -18,12 +16,12 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
|
|||
- [AdapterRemoval](#adapterremoval) - Adapter trimming for Illumina data
|
||||
- [Porechop](#porechop) - Adapter removal for Oxford Nanopore data
|
||||
- [BBDuk](#bbduk) - Quality trimming and filtering for Illumina data
|
||||
- [PRINSEQ++](#prinseq++) - Quality trimming and filtering for Illunina data
|
||||
- [PRINSEQ++](#prinseq) - Quality trimming and filtering for Illunina data
|
||||
- [Filtlong](#filtlong) - Quality trimming and filtering for Nanopore data
|
||||
- [Bowtie2](#bowtie2) - Host removal for Illumina reads
|
||||
- [minimap2](#minimap2) - Host removal for Nanopore reads
|
||||
- [SAMtools stats](#samtoolsstats) - Statistics from host removal
|
||||
- [SAMtools bam2fq](#samtoolsfastq) - Converts unmapped BAM file to fastq format (minimap2 only)
|
||||
- [SAMtools stats](#samtools-stats) - Statistics from host removal
|
||||
- [SAMtools bam2fq](#samtools-bam2fq) - Converts unmapped BAM file to fastq format (minimap2 only)
|
||||
- [Bracken](#bracken) - Taxonomic classifier using k-mers and abundance estimations
|
||||
- [Kraken2](#kraken2) - Taxonomic classifier using exact k-mer matches
|
||||
- [KrakenUniq](#krakenuniq) - Taxonomic classifier that combines the k-mer-based classification and the number of unique k-mers found in each species
|
||||
|
@ -37,19 +35,25 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
|
|||
- [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline
|
||||
- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution
|
||||
|
||||
### FastQC or falco
|
||||
![](images/taxprofiler_tube.png)
|
||||
|
||||
### FastQC or Falco
|
||||
|
||||
<details markdown="1">
|
||||
<summary>Output files</summary>
|
||||
|
||||
- `fastqc/`
|
||||
- `*_fastqc.html`: FastQC report containing quality metrics.
|
||||
- `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images.
|
||||
- `{fastqc,falco}/`
|
||||
- {raw,preprocessed}
|
||||
- `*html`: FastQC or Falco report containing quality metrics in HTML format.
|
||||
- `*.txt`: FastQC or Falco report containing quality metrics in TXT format.
|
||||
- `*.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images (FastQC only).
|
||||
|
||||
</details>
|
||||
|
||||
[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/).
|
||||
|
||||
If preprocessing is turned on, nf-core/taxprofiler runs FastQC/Falco twice -once before and once after adapter removal/read merging, to allow evaluation of the performance of these preprocessing steps. Note in the General Stats table, the columns of these two instances of FastQC/Falco are placed next to each other to make it easier to evaluate. However, the columns of the actual preprocessing steps (i.e, fastp, AdapterRemoval, and Porechop) will be displayed _after_ the two FastQC/Falco columns, even if they were run 'between' the two FastQC/Falco jobs in the pipeline itself.
|
||||
|
||||
> ℹ️ Falco produces identical output to FastQC but in the `falco/` directory.
|
||||
|
||||
![MultiQC - FastQC sequence counts plot](images/mqc_fastqc_counts.png)
|
||||
|
@ -58,8 +62,6 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
|
|||
|
||||
![MultiQC - FastQC adapter content plot](images/mqc_fastqc_adapter.png)
|
||||
|
||||
> **NB:** The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality.
|
||||
|
||||
### fastp
|
||||
|
||||
[fastp](https://github.com/OpenGene/fastp) is a FASTQ pre-processing tool for quality control, trimmming of adapters, quality filtering and other features.
|
||||
|
@ -188,9 +190,12 @@ It is used with nf-core/taxprofiler to allow removal of 'host' (e.g. human) and/
|
|||
<summary>Output files</summary>
|
||||
|
||||
- `bowtie2/`
|
||||
- `<sample_id>.bam`: BAM file containing reads that aligned against the user-supplied reference genome as well as unmapped reads
|
||||
- `<sample_id>.bowtie2.log`: log file about the mapped reads
|
||||
- `<sample_id>.unmapped.fastq.gz`: the off-target reads from the mapping that is used in downstream steps.
|
||||
- `build/`
|
||||
- `*.bt2`: Bowtie2 indicies of reference genome, only if `--save_hostremoval_index` supplied.
|
||||
- `align/`
|
||||
- `<sample_id>.bam`: BAM file containing reads that aligned against the user-supplied reference genome as well as unmapped reads
|
||||
- `<sample_id>.bowtie2.log`: log file about the mapped reads
|
||||
- `<sample_id>.unmapped.fastq.gz`: the off-target reads from the mapping that is used in downstream steps.
|
||||
|
||||
</details>
|
||||
|
||||
|
@ -212,7 +217,10 @@ It is used with nf-core/taxprofiler to allow removal of 'host' (e.g. human) or o
|
|||
<summary>Output files</summary>
|
||||
|
||||
- `minimap2`
|
||||
- `<sample_id>.bam`: Alignment file in BAM format containing both mapped and unmapped reads.
|
||||
- `build/`
|
||||
- `*.mmi2`: minimap2 indices of reference genome, only if `--save_hostremoval_index` supplied.
|
||||
- `align/`
|
||||
- `<sample_id>.bam`: Alignment file in BAM format containing both mapped and unmapped reads.
|
||||
|
||||
</details>
|
||||
|
||||
|
@ -245,13 +253,31 @@ This directory will be present and contain the unmapped reads from the `.fastq`
|
|||
<details markdown="1">
|
||||
<summary>Output files</summary>
|
||||
|
||||
- `samtoolsstats`
|
||||
- `samtools/stats`
|
||||
- `<sample_id>.stats`: File containing samtools stats output.
|
||||
|
||||
</details>
|
||||
|
||||
In most cases you do not need to check this file, as it is rendered in the MultiQC run report.
|
||||
|
||||
### Run Merging
|
||||
|
||||
nf-core/taxprofiler offers the option to merge FASTQ files of multiple sequencing runs or libraries that derive from the same sample, as specified in the input samplesheet.
|
||||
|
||||
This is the last preprocessing step, so if you have multiple runs or libraries (and run merging turned on), this will represent the final reads that will go into classification/profiling steps.
|
||||
|
||||
<details markdown="1">
|
||||
<summary>Output files</summary>
|
||||
|
||||
- `run_merging/`
|
||||
- `*.fastq.gz`: Concatenated FASTQ files on a per-sample basis
|
||||
|
||||
</details>
|
||||
|
||||
Note that you will only find samples that went through the run merging step in this directory. For samples that had a single run or library will not go through this step of the pipeline and thus will not be present in this directory.
|
||||
|
||||
⚠️ You must make sure to turn on the saving of the reads from the previous preprocessing step you may have turned on, if you have single-run or library reads in your pipeline run, and wish to save the final reads that go into classification/profiling!
|
||||
|
||||
### Bracken
|
||||
|
||||
[Bracken](https://ccb.jhu.edu/software/bracken/) (Bayesian Reestimation of Abundance with Kraken) is a highly accurate statistical method that computes the abundance of species in DNA sequences from a metagenomics sample. Braken uses the taxonomy labels assigned by Kraken, a highly accurate metagenomics classification algorithm, to estimate the number of reads originating from each species present in a sample.
|
||||
|
|
|
@ -17,7 +17,7 @@ Both contain metadata and paths to the data of your input samples and databases.
|
|||
|
||||
When running nf-core/taxprofiler, every step and tool is 'opt in'. To run a given classifier or profiler you must make sure to supply both a database in your `<database>.csv` and supply `--run_<profiler>` flag to your command. Omitting either will result in the profiling tool not executing.
|
||||
|
||||
nf-core/profiler also includes optional pre-processing (adapter clipping, merge running etc.) or post-processing (visualisation) steps. These are also opt in with a `--perform_<step>` flag. In some cases, the pre- and post-processing steps may also require additional files. Please check the parameters tab of this documentation for more information.
|
||||
nf-core/taxprofiler also includes optional pre-processing (adapter clipping, merge running etc.) or post-processing (visualisation) steps. These are also opt in with a `--perform_<step>` flag. In some cases, the pre- and post-processing steps may also require additional files. Please check the parameters tab of this documentation for more information.
|
||||
|
||||
Please see the rest of this page for information about how to prepare input samplesheets and databases and how to run Nextflow pipelines. See the [parameters](https://nf-co.re/taxprofiler/parameters) documentation for more information about specific options the pipeline also offers.
|
||||
|
||||
|
@ -89,7 +89,9 @@ The pipeline takes the paths and specific classification/profiling parameters of
|
|||
|
||||
> ⚠️ To allow user freedom, nf-core/taxprofiler does not check for mandatory or the validity of non-file database parameters for correct execution of the tool - excluding options offered via pipeline level parameters! Please validate your database parameters (cross-referencing [parameters](https://nf-co.re/taxprofiler/parameters, and the given tool documentation) before submitting the database sheet! For example, if you don't use the default read length - Bracken will require `-r <read_length>` in the `db_params` column.
|
||||
|
||||
An example database sheet can look as follows, where 5 tools are being used, and `malt` and `kraken2` will be used against two databases each. This is because specifying `bracken` implies first running `kraken2` on the same database.
|
||||
An example database sheet can look as follows, where 7 tools are being used, and `malt` and `kraken2` will be used against two databases each.
|
||||
|
||||
`kraken2` will be run twice even though only having a single 'dedicated' database because specifying `bracken` implies first running `kraken2` on the `bracken` database, as required by `bracken`.
|
||||
|
||||
```console
|
||||
tool,db_name,db_params,db_path
|
||||
|
@ -199,7 +201,7 @@ You can optionally save the FASTQ output of the run merging with the `--save_com
|
|||
|
||||
> ⚠️ For nanopore data: we do not recommend performing any read preprocessing or complexity filtering if you are using ONTs Guppy toolkit for basecalling and post-processing.
|
||||
|
||||
#### Host Removal
|
||||
#### Host-Read Removal
|
||||
|
||||
Removal of possible-host reads from FASTQ files prior classification/profiling can be activated with `--perform_shortread_hostremoval` or `--perform_longread_hostremoval`.
|
||||
|
||||
|
|
|
@ -12,11 +12,6 @@ class WorkflowTaxprofiler {
|
|||
public static void initialise(params, log) {
|
||||
genomeExistsError(params, log)
|
||||
|
||||
// TODO update as necessary
|
||||
//if (!params.fasta) {
|
||||
// log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file."
|
||||
// System.exit(1)
|
||||
//}
|
||||
}
|
||||
|
||||
//
|
||||
|
|
|
@ -1,31 +0,0 @@
|
|||
process ENSURE_FASTQ_EXTENSION {
|
||||
tag "$meta.id"
|
||||
label 'process_low'
|
||||
|
||||
conda "conda-forge::bash=5.0
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv2/biocontainers_v1.2.0_cv2.img' :
|
||||
'biocontainers/biocontainers:v1.2.0_cv2' }"
|
||||
|
||||
|
||||
input:
|
||||
tuple val(meta), path(reads)
|
||||
|
||||
output:
|
||||
tuple val(meta), path('*.fastq.gz'), emit: reads
|
||||
|
||||
script:
|
||||
if (meta.single_end) {
|
||||
fastq = "${reads.baseName}.fastq.gz"
|
||||
"""
|
||||
ln -s '${reads}' '${fastq}'
|
||||
"""
|
||||
} else {
|
||||
first = "${reads[0].baseName}.fastq.gz"
|
||||
second = "${reads[1].baseName}.fastq.gz"
|
||||
"""
|
||||
ln -s '${reads[0]}' '${first}'
|
||||
ln -s '${reads[1]}' '${second}'
|
||||
"""
|
||||
}
|
||||
}
|
|
@ -2,10 +2,10 @@ process KRAKEN2_STANDARD_REPORT {
|
|||
tag "$meta.id"
|
||||
label 'process_single'
|
||||
|
||||
conda "conda-forge::sed=4.8"
|
||||
conda "conda-forge::sed=4.7"
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv2/biocontainers_v1.2.0_cv2.img' :
|
||||
'biocontainers/biocontainers:v1.2.0_cv2' }"
|
||||
'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
|
||||
'ubuntu:20.04' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(report)
|
||||
|
|
|
@ -4,8 +4,8 @@ process KRONA_CLEANUP {
|
|||
|
||||
conda "conda-forge::sed=4.7"
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' :
|
||||
'biocontainers/biocontainers:v1.2.0_cv1' }"
|
||||
'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
|
||||
'ubuntu:20.04' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(krona, stageAs: 'uncleaned.krona.txt')
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
process SAMPLESHEET_CHECK {
|
||||
tag "$samplesheet"
|
||||
label 'process_single'
|
||||
|
||||
conda "conda-forge::python=3.8.3"
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
|
@ -13,6 +14,9 @@ process SAMPLESHEET_CHECK {
|
|||
path '*.csv' , emit: csv
|
||||
path "versions.yml", emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script: // This script is bundled with the pipeline, in nf-core/taxprofiler/bin/
|
||||
"""
|
||||
check_samplesheet.py \\
|
||||
|
|
|
@ -9,7 +9,6 @@
|
|||
// Global default params, used in configs
|
||||
params {
|
||||
|
||||
// TODO nf-core: Specify your pipeline's command line flags
|
||||
// Input options
|
||||
input = null
|
||||
|
||||
|
@ -103,7 +102,7 @@ params {
|
|||
shortread_hostremoval_index = null
|
||||
longread_hostremoval_index = null
|
||||
save_hostremoval_index = false
|
||||
save_hostremoval_bam = false
|
||||
save_hostremoval_bam = false
|
||||
save_hostremoval_unmapped = false
|
||||
|
||||
|
||||
|
@ -301,12 +300,12 @@ dag {
|
|||
|
||||
manifest {
|
||||
name = 'nf-core/taxprofiler'
|
||||
author = """nf-core community"""
|
||||
author = """James A. Fellows Yates, Sofia Stamouli, Moritz E. Beber, Lauri Mesilaakso, Thomas A. Christensen II, Jianhong Ou, Mahwash Jamy, Maxime Borry, Rafal Stepien, Tanja Normark"""
|
||||
homePage = 'https://github.com/nf-core/taxprofiler'
|
||||
description = """Taxonomic profiling of shotgun metagenomic data"""
|
||||
description = """Taxonomic classification and profiling of shotgun metagenomic data"""
|
||||
mainScript = 'main.nf'
|
||||
nextflowVersion = '!>=22.10.1'
|
||||
version = '1.0dev'
|
||||
version = '1.0.0'
|
||||
doi = ''
|
||||
}
|
||||
|
||||
|
|
|
@ -67,7 +67,7 @@
|
|||
"save_preprocessed_reads": {
|
||||
"type": "boolean",
|
||||
"fa_icon": "fas fa-save",
|
||||
"description": "Save reads from adapter clipping/pair-merging, length filtering for both short and long reads",
|
||||
"description": "Save reads from samples that went through the adapter clipping, pair-merging, and length filtering steps for both short and long reads",
|
||||
"help_text": "This saves the FASTQ output from the following tools:\n\n- fastp\n- AdapterRemoval\n- Porechop\n- Filtlong\n\nThese reads will be a mixture of: adapter clipped, quality trimmed, pair-merged, and length filtered, depending on the parameters you set."
|
||||
}
|
||||
},
|
||||
|
@ -116,7 +116,8 @@
|
|||
"type": "string",
|
||||
"default": "None",
|
||||
"description": "Specify a list of all possible adapters to trim. Overrides --shortread_qc_adapter1/2. Formats: .txt (AdapterRemoval) or .fasta. (fastp).",
|
||||
"help_text": "Allows to supply a file with a list of adapter (combinations) to remove from all files. \n\nOverrides the --shortread_qc_adapter1/--shortread_qc_adapter2 parameters . \n\nFor AdapterRemoval this consists of a two column table with a `.txt` extension: first column represents forward strand, second column for reverse strand. You must supply all possible combinations, one per line, and this list is applied to all files. See AdapterRemoval documentation for more information.\n\nFor fastp this consists of a standard FASTA format with a `.fasta`/`.fa`/`.fna`/`.fas` extension. The adapter sequence in this file should be at least 6bp long, otherwise it will be skipped. fastp trims the adapters present in the FASTA file one by one.\n\n> Modifies AdapterRemoval parameter: --adapter-list\n> Modifies fastp parameter: --adapter_fasta"
|
||||
"help_text": "Allows to supply a file with a list of adapter (combinations) to remove from all files. \n\nOverrides the --shortread_qc_adapter1/--shortread_qc_adapter2 parameters . \n\nFor AdapterRemoval this consists of a two column table with a `.txt` extension: first column represents forward strand, second column for reverse strand. You must supply all possible combinations, one per line, and this list is applied to all files. See AdapterRemoval documentation for more information.\n\nFor fastp this consists of a standard FASTA format with a `.fasta`/`.fa`/`.fna`/`.fas` extension. The adapter sequence in this file should be at least 6bp long, otherwise it will be skipped. fastp trims the adapters present in the FASTA file one by one.\n\n> Modifies AdapterRemoval parameter: --adapter-list\n> Modifies fastp parameter: --adapter_fasta",
|
||||
"fa_icon": "fas fa-th-list"
|
||||
},
|
||||
"shortread_qc_mergepairs": {
|
||||
"type": "boolean",
|
||||
|
@ -194,7 +195,7 @@
|
|||
"save_complexityfiltered_reads": {
|
||||
"type": "boolean",
|
||||
"fa_icon": "fas fa-save",
|
||||
"description": "Save complexity filtered short-reads",
|
||||
"description": "Save reads from samples that went through the complexity filtering step",
|
||||
"help_text": "Specify whether to save the final complexity filtered reads in your results directory (`--outdir`)."
|
||||
}
|
||||
},
|
||||
|
@ -302,7 +303,7 @@
|
|||
"save_hostremoval_unmapped": {
|
||||
"type": "boolean",
|
||||
"fa_icon": "fas fa-save",
|
||||
"description": "Save unmapped reads in FASTQ format from host removal",
|
||||
"description": "Save reads from samples that went through the host-removal step",
|
||||
"help_text": "Save only the reads NOT mapped to the reference genome in FASTQ format (as exported from `samtools view` and `bam2fq`).\n\nThis can be useful if you wish to perform other analyses on the off-target reads from the host mapping, such as manual profiling or _de novo_ assembly."
|
||||
}
|
||||
},
|
||||
|
@ -323,8 +324,8 @@
|
|||
"save_runmerged_reads": {
|
||||
"type": "boolean",
|
||||
"fa_icon": "fas fa-save",
|
||||
"description": "Save run-concatenated input FASTQ files for each sample",
|
||||
"help_text": "Save the run- and library-concatenated reads of a given sample in FASTQ format."
|
||||
"description": "Save reads from samples that went through the run-merging step",
|
||||
"help_text": "Save the run- and library-concatenated reads of a given sample in FASTQ format.\n\n> \u26a0\ufe0f Only samples that went through the run-merging step of the pipeline will be stored in the resulting directory. \n\nIf you wish to save the files that go to the classification/profiling steps for samples that _did not_ go through run merging, you must supply the appropriate upstream `--save_<preprocessing_step>` flag.\n\n"
|
||||
}
|
||||
},
|
||||
"fa_icon": "fas fa-clipboard-check"
|
||||
|
@ -427,7 +428,7 @@
|
|||
},
|
||||
"run_bracken": {
|
||||
"type": "boolean",
|
||||
"description": "Post-process kraken2 reports with Bracken.",
|
||||
"description": "Turn on Bracken (and the required Kraken2 prerequisite step).",
|
||||
"fa_icon": "fas fa-toggle-on"
|
||||
},
|
||||
"run_malt": {
|
||||
|
@ -513,34 +514,39 @@
|
|||
"standardisation_taxpasta_format": {
|
||||
"type": "string",
|
||||
"default": "tsv",
|
||||
"fa_icon": "fas fa-file",
|
||||
"fa_icon": "fas fa-pastafarianism",
|
||||
"description": "The desired output format.",
|
||||
"enum": ["tsv", "csv", "arrow", "parquet", "biom"]
|
||||
},
|
||||
"taxpasta_taxonomy_dir": {
|
||||
"type": "string",
|
||||
"description": "The path to a directory containing taxdump files.",
|
||||
"help_text": "This arguments provides the path to the directory containing taxdump files. At least nodes.dmp and names.dmp are required. A merged.dmp file is optional. \n\nModifies tool parameter(s):\n-taxpasta: `--taxpasta_taxonomy_dir`"
|
||||
"help_text": "This arguments provides the path to the directory containing taxdump files. At least nodes.dmp and names.dmp are required. A merged.dmp file is optional. \n\nModifies tool parameter(s):\n-taxpasta: `--taxpasta_taxonomy_dir`",
|
||||
"fa_icon": "fas fa-tree"
|
||||
},
|
||||
"taxpasta_add_name": {
|
||||
"type": "boolean",
|
||||
"description": "Add the taxon name to the output.",
|
||||
"help_text": "The standard output format of taxpasta is a two-column table including the read counts and the integer taxonomic ID. The taxon name can be added as additional information to the output table.\n\nModifies tool parameter(s):\n- taxpasta: `--taxpasta_add_name`"
|
||||
"help_text": "The standard output format of taxpasta is a two-column table including the read counts and the integer taxonomic ID. The taxon name can be added as additional information to the output table.\n\nModifies tool parameter(s):\n- taxpasta: `--taxpasta_add_name`",
|
||||
"fa_icon": "fas fa-tag"
|
||||
},
|
||||
"taxpasta_add_rank": {
|
||||
"type": "boolean",
|
||||
"description": "Add the taxon rank to the output.",
|
||||
"help_text": "The standard output format of taxpasta is a two-column table including the read counts and the integer taxonomic ID. The taxon rank can be added as additional information to the output table.\n\nModifies tool parameter(s):\n- taxpasta: `--taxpasta_add_rank`"
|
||||
"help_text": "The standard output format of taxpasta is a two-column table including the read counts and the integer taxonomic ID. The taxon rank can be added as additional information to the output table.\n\nModifies tool parameter(s):\n- taxpasta: `--taxpasta_add_rank`",
|
||||
"fa_icon": "fas fa-sort-amount-down-alt"
|
||||
},
|
||||
"taxpasta_add_lineage": {
|
||||
"type": "boolean",
|
||||
"description": "Add the taxon's entire lineage to the output.",
|
||||
"help_text": "\nThe standard output format of taxpasta is a two-column table including the read counts and the integer taxonomic ID. The taxon's entire lineage with the taxon names separated by semi-colons can be added as additional information to the output table.\n\nModifies tool parameter(s):\n- taxpasta: `--taxpasta_add_lineage`\n"
|
||||
"description": "Add the taxon's entire name lineage to the output.",
|
||||
"help_text": "\nThe standard output format of taxpasta is a two-column table including the read counts and the integer taxonomic ID. The taxon's entire lineage with the taxon names separated by semi-colons can be added as additional information to the output table.\n\nModifies tool parameter(s):\n- taxpasta: `--taxpasta_add_lineage`\n",
|
||||
"fa_icon": "fas fa-link"
|
||||
},
|
||||
"taxpasta_add_idlineage": {
|
||||
"type": "boolean",
|
||||
"description": "Add the taxon's entire lineage to the output.",
|
||||
"help_text": "\nThe standard output format of taxpasta is a two-column table including the read counts and the integer taxonomic ID. The taxon's entire lineage with the taxon identifiers separated by semi-colons can be added as additional information to the output table.\n\nModifies tool parameter(s):\n- taxpasta: `--taxpasta_add_idlineage`\n"
|
||||
"description": "Add the taxon's entire ID lineage to the output.",
|
||||
"help_text": "\nThe standard output format of taxpasta is a two-column table including the read counts and the integer taxonomic ID. The taxon's entire lineage with the taxon identifiers separated by semi-colons can be added as additional information to the output table.\n\nModifies tool parameter(s):\n- taxpasta: `--taxpasta_add_idlineage`\n",
|
||||
"fa_icon": "fas fa-link"
|
||||
}
|
||||
},
|
||||
"fa_icon": "fas fa-chart-line"
|
||||
|
|
|
@ -28,22 +28,20 @@ workflow DB_CHECK {
|
|||
// Normal checks for within-row validity, so can be moved to separate functions
|
||||
parsed_samplesheet = Channel.fromPath(dbsheet)
|
||||
.splitCsv ( header:true, sep:',' )
|
||||
.map {
|
||||
validate_db_rows(it)
|
||||
create_db_channels(it)
|
||||
.map { row ->
|
||||
validate_db_rows(row)
|
||||
return [ row.subMap(['tool', 'db_name', 'db_params']), file(row.db_path) ]
|
||||
}
|
||||
|
||||
ch_dbs_for_untar = parsed_samplesheet
|
||||
.branch {
|
||||
untar: it[1].toString().endsWith(".tar.gz")
|
||||
.branch { db_meta, db ->
|
||||
untar: db.name.endsWith(".tar.gz")
|
||||
skip: true
|
||||
}
|
||||
|
||||
// Filter the channel to untar only those databases for tools that are selected to be run by the user.
|
||||
ch_input_untar = ch_dbs_for_untar.untar
|
||||
.filter {
|
||||
params["run_${it[0]['tool']}"]
|
||||
}
|
||||
.filter { db_meta, db -> params["run_${db_meta.tool}"] }
|
||||
|
||||
UNTAR (ch_input_untar)
|
||||
ch_versions = ch_versions.mix(UNTAR.out.versions.first())
|
||||
|
@ -54,41 +52,27 @@ workflow DB_CHECK {
|
|||
versions = ch_versions // channel: [ versions.yml ]
|
||||
}
|
||||
|
||||
def validate_db_rows(LinkedHashMap row){
|
||||
def validate_db_rows(LinkedHashMap row) {
|
||||
|
||||
// check minimum number of columns
|
||||
if (row.size() < 4) exit 1, "[nf-core/taxprofiler] ERROR: Invalid database input sheet - malformed row (e.g. missing column). See documentation for more information. Error in: ${row}"
|
||||
// check minimum number of columns
|
||||
if (row.size() < 4) exit 1, "[nf-core/taxprofiler] ERROR: Invalid database input sheet - malformed row (e.g. missing column). See documentation for more information. Error in: ${row}"
|
||||
|
||||
// all columns there
|
||||
def expected_headers = ['tool', 'db_name', 'db_params', 'db_path']
|
||||
if ( !row.keySet().containsAll(expected_headers) ) exit 1, "[nf-core/taxprofiler] ERROR: Invalid database input sheet - malformed column names. Please check input TSV. Column names should be: ${expected_keys.join(", ")}"
|
||||
// all columns there
|
||||
def expected_headers = ['tool', 'db_name', 'db_params', 'db_path']
|
||||
if ( !row.keySet().containsAll(expected_headers) ) exit 1, "[nf-core/taxprofiler] ERROR: Invalid database input sheet - malformed column names. Please check input TSV. Column names should be: ${expected_headers.join(", ")}"
|
||||
|
||||
// valid tools specified// TIFNISIH LIST
|
||||
def expected_tools = [ "bracken", "centrifuge", "diamond", "kaiju", "kraken2", "krakenuniq", "malt", "metaphlan3", "motus" ]
|
||||
if ( !expected_tools.contains(row.tool) ) exit 1, "[nf-core/taxprofiler] ERROR: Invalid tool name. Please see documentation for all supported profilers. Error in: ${row}"
|
||||
// valid tools specified
|
||||
def expected_tools = [ "bracken", "centrifuge", "diamond", "kaiju", "kraken2", "krakenuniq", "malt", "metaphlan3", "motus" ]
|
||||
if ( !expected_tools.contains(row.tool) ) exit 1, "[nf-core/taxprofiler] ERROR: Invalid tool name. Please see documentation for all supported profilers. Error in: ${row}"
|
||||
|
||||
// detect quotes in params
|
||||
if ( row.db_params.contains('"') ) exit 1, "[nf-core/taxprofiler] ERROR: Invalid database db_params entry. No quotes allowed. Error in: ${row}"
|
||||
if ( row.db_params.contains("'") ) exit 1, "[nf-core/taxprofiler] ERROR: Invalid database db_params entry. No quotes allowed. Error in: ${row}"
|
||||
// detect quotes in params
|
||||
if ( row.db_params.contains('"') ) exit 1, "[nf-core/taxprofiler] ERROR: Invalid database db_params entry. No quotes allowed. Error in: ${row}"
|
||||
if ( row.db_params.contains("'") ) exit 1, "[nf-core/taxprofiler] ERROR: Invalid database db_params entry. No quotes allowed. Error in: ${row}"
|
||||
|
||||
// check if any form of bracken params, that it must have `;`
|
||||
if ( row.tool == 'bracken' && row.db_params && !row.db_params.contains(";") ) exit 1, "[nf-core/taxprofiler] ERROR: Invalid database db_params entry. Bracken requires a semi-colon if passing parameter. Error in: ${row}"
|
||||
// check if any form of bracken params, that it must have `;`
|
||||
if ( row.tool == 'bracken' && row.db_params && !row.db_params.contains(";") ) exit 1, "[nf-core/taxprofiler] ERROR: Invalid database db_params entry. Bracken requires a semi-colon if passing parameter. Error in: ${row}"
|
||||
|
||||
// ensure that the database directory exists
|
||||
if (!file(row.db_path, type: 'dir').exists()) exit 1, "ERROR: Please check input samplesheet -> database path could not be found!\n${row.db_path}"
|
||||
|
||||
}
|
||||
|
||||
def create_db_channels(LinkedHashMap row) {
|
||||
def meta = [:]
|
||||
meta.tool = row.tool
|
||||
meta.db_name = row.db_name
|
||||
meta.db_params = row.db_params
|
||||
|
||||
def array = []
|
||||
if (!file(row.db_path, type: 'dir').exists()) {
|
||||
exit 1, "ERROR: Please check input samplesheet -> database path could not be found!\n${row.db_path}"
|
||||
}
|
||||
array = [ meta, file(row.db_path) ]
|
||||
|
||||
return array
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -12,9 +12,9 @@ workflow INPUT_CHECK {
|
|||
parsed_samplesheet = SAMPLESHEET_CHECK ( samplesheet )
|
||||
.csv
|
||||
.splitCsv ( header:true, sep:',' )
|
||||
.branch {
|
||||
fasta: it['fasta'] != ''
|
||||
nanopore: it['instrument_platform'] == 'OXFORD_NANOPORE'
|
||||
.branch { row ->
|
||||
fasta: row.fasta != ''
|
||||
nanopore: row.instrument_platform == 'OXFORD_NANOPORE'
|
||||
fastq: true
|
||||
}
|
||||
|
||||
|
@ -37,49 +37,42 @@ workflow INPUT_CHECK {
|
|||
// Function to get list of [ meta, [ fastq_1, fastq_2 ] ]
|
||||
def create_fastq_channel(LinkedHashMap row) {
|
||||
// create meta map
|
||||
def meta = [:]
|
||||
meta.id = row.sample
|
||||
meta.run_accession = row.run_accession
|
||||
meta.instrument_platform = row.instrument_platform
|
||||
meta.single_end = row.single_end.toBoolean()
|
||||
meta.is_fasta = false
|
||||
def meta = row.subMap(['sample', 'run_accession', 'instrument_platform'])
|
||||
meta.id = meta.sample
|
||||
meta.single_end = row.single_end.toBoolean()
|
||||
meta.is_fasta = false
|
||||
|
||||
// add path(s) of the fastq file(s) to the meta map
|
||||
def fastq_meta = []
|
||||
if (!file(row.fastq_1).exists()) {
|
||||
exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}"
|
||||
}
|
||||
|
||||
if (meta.single_end) {
|
||||
fastq_meta = [ meta, [ file(row.fastq_1) ] ]
|
||||
return [ meta, [ file(row.fastq_1) ] ]
|
||||
} else {
|
||||
if (meta.instrument_platform == 'OXFORD_NANOPORE') {
|
||||
if (row.fastq_2 != '') {
|
||||
exit 1, "ERROR: Please check input samplesheet -> For Oxford Nanopore reads Read 2 FastQ should be empty!\n${row.fastq_2}"
|
||||
}
|
||||
fastq_meta = [ meta, [ file(row.fastq_1) ] ]
|
||||
return [ meta, [ file(row.fastq_1) ] ]
|
||||
} else {
|
||||
if (!file(row.fastq_2).exists()) {
|
||||
exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}"
|
||||
}
|
||||
fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ]
|
||||
return [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ]
|
||||
}
|
||||
|
||||
}
|
||||
return fastq_meta
|
||||
}// Function to get list of [ meta, fasta ]
|
||||
def create_fasta_channel(LinkedHashMap row) {
|
||||
def meta = [:]
|
||||
meta.id = row.sample
|
||||
meta.run_accession = row.run_accession
|
||||
meta.instrument_platform = row.instrument_platform
|
||||
meta.single_end = true
|
||||
meta.is_fasta = true
|
||||
}
|
||||
|
||||
// Function to get list of [ meta, fasta ]
|
||||
def create_fasta_channel(LinkedHashMap row) {
|
||||
def meta = row.subMap(['sample', 'run_accession', 'instrument_platform'])
|
||||
meta.id = meta.sample
|
||||
meta.single_end = true
|
||||
meta.is_fasta = true
|
||||
|
||||
def array = []
|
||||
if (!file(row.fasta).exists()) {
|
||||
exit 1, "ERROR: Please check input samplesheet -> FastA file does not exist!\n${row.fasta}"
|
||||
}
|
||||
array = [ meta, [ file(row.fasta) ] ]
|
||||
|
||||
return array
|
||||
return [ meta, [ file(row.fasta) ] ]
|
||||
}
|
||||
|
|
|
@ -46,7 +46,7 @@ workflow LONGREAD_HOSTREMOVAL {
|
|||
ch_versions = ch_versions.mix( SAMTOOLS_INDEX.out.versions.first() )
|
||||
|
||||
bam_bai = MINIMAP2_ALIGN.out.bam
|
||||
.join(SAMTOOLS_INDEX.out.bai, remainder: true)
|
||||
.join(SAMTOOLS_INDEX.out.bai)
|
||||
|
||||
SAMTOOLS_STATS ( bam_bai, reference )
|
||||
ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first())
|
||||
|
|
|
@ -20,33 +20,23 @@ workflow LONGREAD_PREPROCESSING {
|
|||
PORECHOP_PORECHOP ( reads )
|
||||
|
||||
ch_processed_reads = PORECHOP_PORECHOP.out.reads
|
||||
.map {
|
||||
meta, reads ->
|
||||
def meta_new = meta.clone()
|
||||
meta_new['single_end'] = 1
|
||||
[ meta_new, reads ]
|
||||
}
|
||||
.map { meta, reads -> [ meta + [single_end: 1], reads ] }
|
||||
|
||||
ch_versions = ch_versions.mix(PORECHOP_PORECHOP.out.versions.first())
|
||||
ch_multiqc_files = ch_multiqc_files.mix( PORECHOP_PORECHOP.out.log )
|
||||
|
||||
} else if ( params.longread_qc_skipadaptertrim && !params.longread_qc_skipqualityfilter) {
|
||||
|
||||
ch_processed_reads = FILTLONG ( reads.map{ meta, reads -> [meta, [], reads ]} )
|
||||
ch_processed_reads = FILTLONG ( reads.map { meta, reads -> [meta, [], reads ] } )
|
||||
ch_versions = ch_versions.mix(FILTLONG.out.versions.first())
|
||||
ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log )
|
||||
|
||||
} else {
|
||||
PORECHOP_PORECHOP ( reads )
|
||||
ch_clipped_reads = PORECHOP_PORECHOP.out.reads
|
||||
.map {
|
||||
meta, reads ->
|
||||
def meta_new = meta.clone()
|
||||
meta_new['single_end'] = 1
|
||||
[ meta_new, reads ]
|
||||
}
|
||||
.map { meta, reads -> [ meta + [single_end: 1], reads ] }
|
||||
|
||||
ch_processed_reads = FILTLONG ( ch_clipped_reads.map{ meta, reads -> [meta, [], reads ]} ).reads
|
||||
ch_processed_reads = FILTLONG ( ch_clipped_reads.map { meta, reads -> [ meta, [], reads ] } ).reads
|
||||
|
||||
ch_versions = ch_versions.mix(PORECHOP_PORECHOP.out.versions.first())
|
||||
ch_versions = ch_versions.mix(FILTLONG.out.versions.first())
|
||||
|
|
|
@ -2,19 +2,19 @@
|
|||
// Run profiling
|
||||
//
|
||||
|
||||
include { MALT_RUN } from '../../modules/nf-core/malt/run/main'
|
||||
include { MEGAN_RMA2INFO as MEGAN_RMA2INFO_TSV } from '../../modules/nf-core/megan/rma2info/main'
|
||||
include { KRAKEN2_KRAKEN2 } from '../../modules/nf-core/kraken2/kraken2/main'
|
||||
include { KRAKEN2_STANDARD_REPORT } from '../../modules/local/kraken2_standard_report'
|
||||
include { BRACKEN_BRACKEN } from '../../modules/nf-core/bracken/bracken/main'
|
||||
include { CENTRIFUGE_CENTRIFUGE } from '../../modules/nf-core/centrifuge/centrifuge/main'
|
||||
include { CENTRIFUGE_KREPORT } from '../../modules/nf-core/centrifuge/kreport/main'
|
||||
include { METAPHLAN3_METAPHLAN3 } from '../../modules/nf-core/metaphlan3/metaphlan3/main'
|
||||
include { KAIJU_KAIJU } from '../../modules/nf-core/kaiju/kaiju/main'
|
||||
include { KAIJU_KAIJU2TABLE } from '../../modules/nf-core/kaiju/kaiju2table/main'
|
||||
include { DIAMOND_BLASTX } from '../../modules/nf-core/diamond/blastx/main'
|
||||
include { MOTUS_PROFILE } from '../../modules/nf-core/motus/profile/main'
|
||||
include { KRAKENUNIQ_PRELOADEDKRAKENUNIQ } from '../../modules/nf-core/krakenuniq/preloadedkrakenuniq/main'
|
||||
include { MALT_RUN } from '../../modules/nf-core/malt/run/main'
|
||||
include { MEGAN_RMA2INFO as MEGAN_RMA2INFO_TSV } from '../../modules/nf-core/megan/rma2info/main'
|
||||
include { KRAKEN2_KRAKEN2 } from '../../modules/nf-core/kraken2/kraken2/main'
|
||||
include { KRAKEN2_STANDARD_REPORT } from '../../modules/local/kraken2_standard_report'
|
||||
include { BRACKEN_BRACKEN } from '../../modules/nf-core/bracken/bracken/main'
|
||||
include { CENTRIFUGE_CENTRIFUGE } from '../../modules/nf-core/centrifuge/centrifuge/main'
|
||||
include { CENTRIFUGE_KREPORT } from '../../modules/nf-core/centrifuge/kreport/main'
|
||||
include { METAPHLAN3_METAPHLAN3 } from '../../modules/nf-core/metaphlan3/metaphlan3/main'
|
||||
include { KAIJU_KAIJU } from '../../modules/nf-core/kaiju/kaiju/main'
|
||||
include { KAIJU_KAIJU2TABLE as KAIJU_KAIJU2TABLE_SINGLE } from '../../modules/nf-core/kaiju/kaiju2table/main'
|
||||
include { DIAMOND_BLASTX } from '../../modules/nf-core/diamond/blastx/main'
|
||||
include { MOTUS_PROFILE } from '../../modules/nf-core/motus/profile/main'
|
||||
include { KRAKENUNIQ_PRELOADEDKRAKENUNIQ } from '../../modules/nf-core/krakenuniq/preloadedkrakenuniq/main'
|
||||
|
||||
workflow PROFILING {
|
||||
take:
|
||||
|
@ -35,10 +35,7 @@ workflow PROFILING {
|
|||
ch_input_for_profiling = reads
|
||||
.map {
|
||||
meta, reads ->
|
||||
def meta_new = meta.clone()
|
||||
pairtype = meta_new['single_end'] ? '_se' : '_pe'
|
||||
meta_new['id'] = meta_new['id'] + pairtype
|
||||
[meta_new, reads]
|
||||
[meta + [id: "${meta.id}${meta.single_end ? '_se' : '_pe'}"], reads]
|
||||
}
|
||||
.combine(databases)
|
||||
.branch {
|
||||
|
@ -68,34 +65,34 @@ workflow PROFILING {
|
|||
// MALT: We groupTuple to have all samples in one channel for MALT as database
|
||||
// loading takes a long time, so we only want to run it once per database
|
||||
ch_input_for_malt = ch_input_for_profiling.malt
|
||||
.map {
|
||||
meta, reads, db_meta, db ->
|
||||
.map {
|
||||
meta, reads, db_meta, db ->
|
||||
|
||||
// Reset entire input meta for MALT to just database name,
|
||||
// as we don't run run on a per-sample basis due to huge datbaases
|
||||
// so all samples are in one run and so sample-specific metadata
|
||||
// unnecessary. Set as database name to prevent `null` job ID and prefix.
|
||||
def temp_meta = [ id: meta['db_name'] ]
|
||||
// Reset entire input meta for MALT to just database name,
|
||||
// as we don't run run on a per-sample basis due to huge datbaases
|
||||
// so all samples are in one run and so sample-specific metadata
|
||||
// unnecessary. Set as database name to prevent `null` job ID and prefix.
|
||||
def temp_meta = [ id: meta['db_name'] ]
|
||||
|
||||
// Extend database parameters to specify whether to save alignments or not
|
||||
def new_db_meta = db_meta.clone()
|
||||
def sam_format = params.malt_save_reads ? ' --alignments ./ -za false' : ""
|
||||
new_db_meta['db_params'] = db_meta['db_params'] + sam_format
|
||||
// Extend database parameters to specify whether to save alignments or not
|
||||
def new_db_meta = db_meta.clone()
|
||||
def sam_format = params.malt_save_reads ? ' --alignments ./ -za false' : ""
|
||||
new_db_meta['db_params'] = db_meta['db_params'] + sam_format
|
||||
|
||||
// Combine reduced sample metadata with updated database parameters metadata,
|
||||
// make sure id is db_name for publishing purposes.
|
||||
def new_meta = temp_meta + new_db_meta
|
||||
new_meta['id'] = new_meta['db_name']
|
||||
// Combine reduced sample metadata with updated database parameters metadata,
|
||||
// make sure id is db_name for publishing purposes.
|
||||
def new_meta = temp_meta + new_db_meta
|
||||
new_meta['id'] = new_meta['db_name']
|
||||
|
||||
[ new_meta, reads, db ]
|
||||
[ new_meta, reads, db ]
|
||||
|
||||
}
|
||||
.groupTuple(by: [0,2])
|
||||
.multiMap {
|
||||
it ->
|
||||
reads: [ it[0], it[1].flatten() ]
|
||||
db: it[2]
|
||||
}
|
||||
}
|
||||
.groupTuple(by: [0,2])
|
||||
.multiMap {
|
||||
meta, reads, db ->
|
||||
reads: [ meta, reads.flatten() ]
|
||||
db: db
|
||||
}
|
||||
|
||||
MALT_RUN ( ch_input_for_malt.reads, ch_input_for_malt.db )
|
||||
|
||||
|
@ -120,12 +117,11 @@ workflow PROFILING {
|
|||
|
||||
}
|
||||
|
||||
if ( params.run_kraken2 ) {
|
||||
if ( params.run_kraken2 || params.run_bracken ) {
|
||||
// Have to pick first element of db_params if using bracken,
|
||||
// as db sheet for bracken must have ; sep list to
|
||||
// distinguish between kraken and bracken parameters
|
||||
ch_input_for_kraken2 = ch_input_for_profiling.kraken2
|
||||
.dump(tag: "ch_input_for_kraken2_b4")
|
||||
.map {
|
||||
meta, reads, db_meta, db ->
|
||||
def db_meta_new = db_meta.clone()
|
||||
|
@ -272,10 +268,10 @@ workflow PROFILING {
|
|||
ch_versions = ch_versions.mix( KAIJU_KAIJU.out.versions.first() )
|
||||
ch_raw_classifications = ch_raw_classifications.mix( KAIJU_KAIJU.out.results )
|
||||
|
||||
KAIJU_KAIJU2TABLE ( KAIJU_KAIJU.out.results, ch_input_for_kaiju.db, params.kaiju_taxon_rank)
|
||||
ch_versions = ch_versions.mix( KAIJU_KAIJU2TABLE.out.versions )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary )
|
||||
ch_raw_profiles = ch_raw_profiles.mix( KAIJU_KAIJU2TABLE.out.summary )
|
||||
KAIJU_KAIJU2TABLE_SINGLE ( KAIJU_KAIJU.out.results, ch_input_for_kaiju.db, params.kaiju_taxon_rank)
|
||||
ch_versions = ch_versions.mix( KAIJU_KAIJU2TABLE_SINGLE.out.versions )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE_SINGLE.out.summary )
|
||||
ch_raw_profiles = ch_raw_profiles.mix( KAIJU_KAIJU2TABLE_SINGLE.out.summary )
|
||||
}
|
||||
|
||||
if ( params.run_diamond ) {
|
||||
|
@ -329,7 +325,7 @@ workflow PROFILING {
|
|||
reads: [ single_meta + db_meta, reads.flatten() ]
|
||||
db: db
|
||||
}
|
||||
// Hardcode to _always_ produce the report file (which is our basic otput, and goes into)
|
||||
// Hardcode to _always_ produce the report file (which is our basic output, and goes into)
|
||||
KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads, ch_input_for_krakenuniq.db, params.krakenuniq_ram_chunk_size, params.krakenuniq_save_reads, true, params.krakenuniq_save_readclassifications )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report )
|
||||
ch_versions = ch_versions.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.versions.first() )
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
//
|
||||
|
||||
include { BRACKEN_COMBINEBRACKENOUTPUTS } from '../../modules/nf-core/bracken/combinebrackenoutputs/main'
|
||||
include { KAIJU_KAIJU2TABLE } from '../../modules/nf-core/kaiju/kaiju2table/main'
|
||||
include { KAIJU_KAIJU2TABLE as KAIJU_KAIJU2TABLE_COMBINED } from '../../modules/nf-core/kaiju/kaiju2table/main'
|
||||
include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_KRAKEN } from '../../modules/nf-core/krakentools/combinekreports/main'
|
||||
include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE } from '../../modules/nf-core/krakentools/combinekreports/main'
|
||||
include { METAPHLAN3_MERGEMETAPHLANTABLES } from '../../modules/nf-core/metaphlan3/mergemetaphlantables/main'
|
||||
|
@ -103,9 +103,9 @@ workflow STANDARDISATION_PROFILES {
|
|||
[[id:it[0]], it[1]]
|
||||
}
|
||||
|
||||
KAIJU_KAIJU2TABLE ( ch_profiles_for_kaiju, ch_input_databases.kaiju.map{it[1]}, params.kaiju_taxon_rank)
|
||||
ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary )
|
||||
ch_versions = ch_versions.mix( KAIJU_KAIJU2TABLE.out.versions )
|
||||
KAIJU_KAIJU2TABLE_COMBINED ( ch_profiles_for_kaiju, ch_input_databases.kaiju.map{it[1]}, params.kaiju_taxon_rank)
|
||||
ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE_COMBINED.out.summary )
|
||||
ch_versions = ch_versions.mix( KAIJU_KAIJU2TABLE_COMBINED.out.versions )
|
||||
|
||||
// Kraken2
|
||||
|
||||
|
@ -151,7 +151,7 @@ workflow STANDARDISATION_PROFILES {
|
|||
|
||||
MOTUS_MERGE ( ch_profiles_for_motus, ch_input_databases.motus.map{it[1]}, motu_version )
|
||||
ch_versions = ch_versions.mix( MOTUS_MERGE.out.versions )
|
||||
|
||||
|
||||
emit:
|
||||
taxpasta = TAXPASTA_MERGE.out.merged_profiles
|
||||
versions = ch_versions
|
||||
|
|
|
@ -9,13 +9,13 @@ def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params)
|
|||
// Validate input parameters
|
||||
WorkflowTaxprofiler.initialise(params, log)
|
||||
|
||||
// TODO nf-core: Add all file path parameters for the pipeline to the list below
|
||||
// Check input path parameters to see if they exist
|
||||
def checkPathParamList = [ params.input, params.genome, params.databases,
|
||||
params.outdir, params.longread_hostremoval_index,
|
||||
params.hostremoval_reference, params.shortread_hostremoval_index,
|
||||
params.multiqc_config, params.shortread_qc_adapterlist,
|
||||
params.krona_taxonomy_directory,
|
||||
params.taxpasta_taxonomy_dir,
|
||||
params.multiqc_logo, params.multiqc_methods_description
|
||||
]
|
||||
for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }
|
||||
|
@ -301,7 +301,6 @@ workflow TAXPROFILER {
|
|||
ch_multiqc_files = ch_multiqc_files.mix( STANDARDISATION_PROFILES.out.mqc.collect{it[1]}.ifEmpty([]) )
|
||||
}
|
||||
|
||||
// TODO create multiQC module for metaphlan
|
||||
MULTIQC (
|
||||
ch_multiqc_files.collect(),
|
||||
ch_multiqc_config.toList(),
|
||||
|
|
Loading…
Reference in a new issue