1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-09-21 05:32:05 +00:00

Merge branch 'dev' into update_usage

This commit is contained in:
sofstam 2022-12-09 10:35:12 +01:00
commit 3af71d9c63
47 changed files with 1536 additions and 193 deletions

View file

@ -18,18 +18,20 @@ jobs:
if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/taxprofiler') }}"
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
NXF_VER:
- "21.10.3"
- "latest-everything"
parameters:
- "--preprocessing_qc_tool falco"
- "--perform_longread_qc false"
- "--perform_shortread_qc false"
- "--shortread_qc_tool fastp"
- "--shortread_qc_tool fastp --shortread_qc_mergepairs --shortread_qc_excludeunmerged"
- "--shortread_qc_tool fastp --shortread_qc_mergepairs --shortread_qc_includeunmerged"
- "--shortread_qc_tool fastp --shortread_qc_mergepairs"
- "--shortread_qc_tool adapterremoval"
- "--shortread_qc_tool adapterremoval --shortread_qc_mergepairs --shortread_qc_excludeunmerged"
- "--shortread_qc_tool adapterremoval --shortread_qc_mergepairs --shortread_qc_includeunmerged"
- "--shortread_qc_tool adapterremoval --shortread_qc_mergepairs"
- "--shortread_complexityfilter_tool bbduk"
- "--shortread_complexityfilter_tool prinseqplusplus"
@ -99,3 +101,69 @@ jobs:
with:
command: nextflow run ${GITHUB_WORKSPACE} -profile test_motus,docker --outdir ./results --databases ./database_motus.csv
attempt_limit: 3
krakenuniq:
name: Test KrakenUniq with workflow parameters
if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/taxprofiler') }}
runs-on: ubuntu-latest
strategy:
matrix:
NXF_VER:
- "21.10.3"
- "latest-everything"
steps:
- name: Check out pipeline code
uses: actions/checkout@v2
- name: Install Nextflow
uses: nf-core/setup-nextflow@v1
with:
version: "${{ matrix.NXF_VER }}"
- name: Show current locale
run: locale
- name: Set UTF-8 enabled locale
run: |
sudo locale-gen en_US.UTF-8
sudo update-locale LANG=en_US.UTF-8
- name: Run pipeline with test data
uses: Wandalen/wretry.action@v1.0.11
with:
command: nextflow run ${GITHUB_WORKSPACE} -profile test_krakenuniq,docker --outdir ./results
attempt_limit: 3
malt:
name: Test MALT with workflow parameters
if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/taxprofiler') }}
runs-on: ubuntu-latest
strategy:
matrix:
NXF_VER:
- "21.10.3"
- "latest-everything"
steps:
- name: Check out pipeline code
uses: actions/checkout@v2
- name: Install Nextflow
uses: nf-core/setup-nextflow@v1
with:
version: "${{ matrix.NXF_VER }}"
- name: Show current locale
run: locale
- name: Set UTF-8 enabled locale
run: |
sudo locale-gen en_US.UTF-8
sudo update-locale LANG=en_US.UTF-8
- name: Run pipeline with test data
uses: Wandalen/wretry.action@v1.0.11
with:
command: nextflow run ${GITHUB_WORKSPACE} -profile test_nothing,docker --run_malt --outdir ./results
attempt_limit: 3

View file

@ -36,6 +36,14 @@
> Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. Improved Metagenomic Analysis with Kraken 2. Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0.
- [KrakenUniq](https://doi.org/10.1186/s13059-018-1568-0)
> Breitwieser, Florian P., Daniel N. Baker, and Steven L. Salzberg. 2018. KrakenUniq: confident and fast metagenomics classification using unique k-mer counts. Genome Biology 19 (1): 198. doi: 10.1186/s13059-018-1568-0
- [Bracken](https://doi.org/10.7717/peerj-cs.104)
> Lu, J., Breitwieser, F. P., Thielen, P., & Salzberg, S. L. (2017). Bracken: Estimating species abundance in metagenomics data. PeerJ Computer Science, 3, e104. doi: 10.7717/peerj-cs.104
- [Krona](https://doi.org/10.1186/1471-2105-12-385)
> Ondov, Brian D., Nicholas H. Bergman, and Adam M. Phillippy. 2011. Interactive metagenomic visualization in a Web browser. BMC Bioinformatics 12 (1): 385. doi: 10.1186/1471-2105-12-385.
@ -62,6 +70,10 @@
- [FILTLONG](https://github.com/rrwick/Filtlong)
- [falco](https://doi.org/10.12688/f1000research.21142.2)
> de Sena Brandine G and Smith AD. Falco: high-speed FastQC emulation for quality control of sequencing data. F1000Research 2021, 8:1874
## Software packaging/containerisation tools
- [Anaconda](https://anaconda.com)

View file

@ -30,13 +30,14 @@ On release, automated continuous integration tests run the pipeline on a full-si
![](docs/images/taxprofiler_tube.png)
1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))
1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) or [`falco`](https://github.com/smithlabcode/falco) as an alternative option)
2. Performs optional read pre-processing
- Adapter clipping and merging (short read: [fastp](https://github.com/OpenGene/fastp), [AdapterRemoval2](https://github.com/MikkelSchubert/adapterremoval); long read: [porechop](https://github.com/rrwick/Porechop))
- Low complexity filtering ([bbduk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/), [PRINSEQ++](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus))
- Host read removal ([BowTie2](http://bowtie-bio.sourceforge.net/bowtie2/))
- Adapter clipping and merging (short-read: [fastp](https://github.com/OpenGene/fastp), [AdapterRemoval2](https://github.com/MikkelSchubert/adapterremoval); long-read: [porechop](https://github.com/rrwick/Porechop))
- Low complexity and quality filtering (short-read: [bbduk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/), [PRINSEQ++](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus); long-read: [Filtlong](https://github.com/rrwick/Filtlong))
- Host-read removal (short-read: [BowTie2](http://bowtie-bio.sourceforge.net/bowtie2/); long-read: [Minimap2](https://github.com/lh3/minimap2))
- Run merging
3. Performs taxonomic profiling using one or more of:
3. Supports statistics for host-read removal ([Samtools](http://www.htslib.org/))
4. Performs taxonomic profiling using one or more of:
- [Kraken2](https://ccb.jhu.edu/software/kraken2/)
- [MetaPhlAn3](https://huttenhower.sph.harvard.edu/metaphlan/)
- [MALT](https://uni-tuebingen.de/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/informatik/lehrstuehle/algorithms-in-bioinformatics/software/malt/)
@ -44,15 +45,16 @@ On release, automated continuous integration tests run the pipeline on a full-si
- [Centrifuge](https://ccb.jhu.edu/software/centrifuge/)
- [Kaiju](https://kaiju.binf.ku.dk/)
- [mOTUs](https://motu-tool.org/)
- [MetaMaps](https://github.com/DiltheyLab/MetaMaps)
4. Perform optional post-processing with:
- [KrakenUniq](https://github.com/fbreitwieser/krakenuniq)
5. Perform optional post-processing with:
- [bracken](https://ccb.jhu.edu/software/bracken/)
5. Standardises output tables
6. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))
6. Standardises output tables
7. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))
8. Plotting Kraken2, Centrifuge, Kaiju and MALT results ([`Krona`](https://hpc.nih.gov/apps/kronatools.html))
## Quick Start
1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=21.10.3`)
1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=21.10.3`).
2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_.
@ -87,7 +89,7 @@ nf-core/taxprofiler was originally written by nf-core community.
We thank the following people for their extensive assistance in the development of this pipeline:
[James A. Fellows Yates](https://github.com/jfy133), [Moritz Beber](https://github.com/Midnighter), [Lauri Mesilaakso](https://github.com/ljmesi), [Sofia Stamouli](https://github.com/sofsam), [Maxime Borry](https://github.com/maxibor).
[James A. Fellows Yates](https://github.com/jfy133), [Moritz Beber](https://github.com/Midnighter), [Lauri Mesilaakso](https://github.com/ljmesi), [Sofia Stamouli](https://github.com/sofsam), [Maxime Borry](https://github.com/maxibor),[Thomas A. Christensen II](https://github.com/MillironX), [Jianhong Ou](https://github.com/jianhong), [Rafal Stepien](https://github.com/rafalstepien), [Mahwash Jamy](https://github.com/mjamy).
## Contributions and Support

View file

@ -21,6 +21,7 @@ run_modules:
- adapterRemoval
- fastp
- bowtie2
- samtools
- kraken
- malt
- custom_content

View file

@ -40,11 +40,29 @@ process {
]
}
withName: FALCO {
ext.prefix = { "${meta.id}_${meta.run_accession}_raw" }
publishDir = [
path: { "${params.outdir}/falco/raw" },
mode: params.publish_dir_mode,
pattern: '*.{html,txt}'
]
}
withName: FALCO_PROCESSED {
ext.prefix = { "${meta.id}_${meta.run_accession}_processed" }
publishDir = [
path: { "${params.outdir}/falco/processed" },
mode: params.publish_dir_mode,
pattern: '*.{html,txt}'
]
}
withName: FASTP_SINGLE {
ext.args = [
// trimming options
params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "",
params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
// filtering options
"--length_required ${params.shortread_qc_minlength}",
(params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp') ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : ''
@ -61,11 +79,11 @@ process {
withName: FASTP_PAIRED {
ext.args = [
// collapsing options - option to retain singletons
params.shortread_qc_excludeunmerged ? '' : "--include_unmerged",
params.shortread_qc_includeunmerged ? '--include_unmerged' : '',
// trimming options
params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "",
params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
params.shortread_qc_adapter2 ? "--adapter_sequence_r2 ${params.shortread_qc_adapter2}" : "--detect_adapter_for_pe",
params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter2 ? "--adapter_sequence_r2 ${params.shortread_qc_adapter2}" : "--detect_adapter_for_pe",
// filtering options
"--length_required ${params.shortread_qc_minlength}",
params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp' ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : ''
@ -82,8 +100,7 @@ process {
withName: ADAPTERREMOVAL_SINGLE {
ext.args = [
// trimming options
params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "",
params.shortread_qc_skipadaptertrim ? "--adapter1 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "",
// filtering options
"--minlength ${params.shortread_qc_minlength}"
].join(' ').trim()
@ -101,9 +118,8 @@ process {
// collapsing options
params.shortread_qc_mergepairs ? "--collapse" : "",
// trimming options
params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "",
params.shortread_qc_adapter2 ? "--adapter2 ${params.shortread_qc_adapter2}" : "",
params.shortread_qc_skipadaptertrim ? "--adapter1 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "", // adding adapter list happens at module input channel level
params.shortread_qc_skipadaptertrim ? "--adapter2 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter2 ? "--adapter2 ${params.shortread_qc_adapter2}" : "",
// filtering options
"--minlength ${params.shortread_qc_minlength}"
].join(' ').trim()
@ -116,7 +132,7 @@ process {
]
}
withName: PORECHOP {
withName: PORECHOP_PORECHOP {
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
path: { "${params.outdir}/porechop" },
@ -215,6 +231,15 @@ process {
]
}
withName: SAMTOOLS_STATS {
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
path: { "${params.outdir}/samtools/stats" },
mode: params.publish_dir_mode,
pattern: '*stats'
]
}
withName: BBMAP_BBDUK {
ext.args = [
"entropy=${params.shortread_complexityfilter_entropy}",
@ -256,7 +281,7 @@ process {
}
withName: MALT_RUN {
ext.args = { "${meta.db_params}" }
ext.args = { "${meta.db_params} -m ${params.malt_mode}" }
// one run with multiple samples, so fix ID to just db name to ensure clean log name
ext.prefix = { "${meta.db_name}" }
publishDir = [
@ -277,7 +302,7 @@ process {
}
withName: KRAKEN2_KRAKEN2 {
ext.args = { "${meta.db_params}" }
ext.args = params.kraken2_save_minimizers ? { "${meta.db_params} --report-minimizer-data" } : { "${meta.db_params}" }
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
publishDir = [
path: { "${params.outdir}/kraken2/${meta.db_name}/" },
@ -286,6 +311,16 @@ process {
]
}
withName: BRACKEN_BRACKEN {
errorStrategy = 'ignore'
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
publishDir = [
path: { "${params.outdir}/bracken/${meta.db_name}/" },
mode: params.publish_dir_mode,
pattern: '*.tsv'
]
}
withName: KRAKENTOOLS_COMBINEKREPORTS {
ext.prefix = { "kraken2_${meta.id}_combined_reports" }
publishDir = [
@ -295,6 +330,17 @@ process {
]
}
withName: KRAKENUNIQ_PRELOADEDKRAKENUNIQ {
ext.args = { "${meta.db_params}" }
// one run with multiple samples, so fix ID to just db name to ensure clean log name
ext.prefix = { "${meta.db_name}" }
publishDir = [
path: { "${params.outdir}/krakenuniq/${meta.db_name}/" },
mode: params.publish_dir_mode,
pattern: '*.{txt,report,fastq.gz}'
]
}
withName: KRONA_CLEANUP {
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
publishDir = [
@ -411,6 +457,13 @@ process {
}
withName: MOTUS_PROFILE {
ext.args = {
[
params.motus_remove_ncbi_ids ? "" : "-p",
params.motus_use_relative_abundance ? "" : "-c",
params.motus_save_mgc_read_counts ? "-M ${task.ext.prefix}.mgc" : ""
].join(',').replaceAll(','," ")
}
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
publishDir = [
path: { "${params.outdir}/motus/${meta.db_name}/" },

View file

@ -34,10 +34,12 @@ params {
hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = true
run_kraken2 = true
run_malt = true
run_bracken = true
run_malt = false
run_metaphlan3 = true
run_centrifuge = true
run_diamond = true
run_krakenuniq = true
run_motus = false
run_krona = true
krona_taxonomy_directory = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab'
@ -50,6 +52,7 @@ params {
process {
withName: MALT_RUN {
maxForks = 1
ext.args = { "-m ${params.malt_mode} -J-Xmx12G" }
}
withName: MEGAN_RMA2INFO_TSV {
maxForks = 1

View file

@ -0,0 +1,72 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.
Use as follows:
nextflow run nf-core/taxprofiler -profile test,<docker/singularity> --outdir <OUTDIR>
----------------------------------------------------------------------------------------
*/
//
// Separate test as KrakenUniq database can sometimes be too big for GHA
//
params {
config_profile_name = 'Test profile'
config_profile_description = 'Minimal test to check KrakenUniq function'
// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = '6.GB'
max_time = '6.h'
// Input data
// TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
// TODO nf-core: Give any required params for the test so that command line flags are not needed
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_krakenuniq.csv'
perform_shortread_qc = true
perform_longread_qc = true
shortread_qc_mergepairs = true
perform_shortread_complexityfilter = true
perform_shortread_hostremoval = true
perform_longread_hostremoval = true
perform_runmerging = true
hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = false
run_kraken2 = false
run_bracken = false
run_malt = false
run_metaphlan3 = false
run_centrifuge = false
run_diamond = false
run_krakenuniq = true
run_motus = false
run_krona = true
krona_taxonomy_directory = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab'
malt_save_reads = true
kraken2_save_reads = true
centrifuge_save_reads = true
diamond_save_reads = true
}
process {
withName: MALT_RUN {
maxForks = 1
}
withName: MEGAN_RMA2INFO_TSV {
maxForks = 1
}
withName: MEGAN_RMA2INFO_KRONA {
maxForks = 1
}
withName: 'EIDO_VALIDATE' {
ext.args = '--st-index sample'
}
withName: 'EIDO_CONVERT' {
ext.args = '--st-index sample'
}
}

View file

@ -10,6 +10,10 @@
----------------------------------------------------------------------------------------
*/
//
// Separate test as mOTUs database download can be flaky
//
params {
config_profile_name = 'mOTUs Test profile'
config_profile_description = 'Minimal test to check mOTUs function'
@ -33,10 +37,15 @@ params {
hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = false
run_kraken2 = false
run_bracken = false
run_malt = false
run_metaphlan3 = false
run_centrifuge = false
run_diamond = false
run_krakenuniq = false
run_motus = true
motus_save_mgc_read_counts = false
motus_remove_ncbi_ids = false
motus_use_relative_abundance = false
run_profile_standardisation = true
}

View file

@ -33,10 +33,12 @@ params {
hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = true
run_kraken2 = true
run_bracken = true
run_malt = true
run_metaphlan3 = true
run_centrifuge = true
run_diamond = true
run_krakenuniq = true
run_motus = false
run_krona = true
}
@ -44,5 +46,6 @@ params {
process {
withName: MALT_RUN {
maxForks = 1
ext.args = { "-m ${params.malt_mode} -J-Xmx12G" }
}
}

View file

@ -34,10 +34,12 @@ params {
hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = false
run_kraken2 = false
run_bracken = false
run_malt = false
run_metaphlan3 = false
run_centrifuge = false
run_diamond = false
run_krakenuniq = false
run_motus = false
}

View file

@ -33,15 +33,18 @@ params {
hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = false
run_kraken2 = false
run_bracken = false
run_malt = false
run_metaphlan3 = false
run_centrifuge = false
run_diamond = false
run_krakenuniq = false
run_motus = false
}
process {
withName: MALT_RUN {
maxForks = 1
ext.args = { "-m ${params.malt_mode} -J-Xmx12G" }
}
}

View file

@ -19,6 +19,7 @@ params {
hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = true
run_kraken2 = true
run_bracken = true
run_malt = true
run_metaphlan3 = true
run_centrifuge = true
@ -36,6 +37,7 @@ params {
process {
withName: MALT_RUN {
maxForks = 1
ext.args = { "-m ${params.malt_mode} -J-Xmx12G" }
}
withName: MEGAN_RMA2INFO {
maxForks = 1

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 648 KiB

After

Width:  |  Height:  |  Size: 690 KiB

View file

@ -7,7 +7,7 @@
viewBox="0 0 555.62502 211.66668"
version="1.1"
id="svg5"
inkscape:version="1.2 (1:1.2.1+202207142221+cd75a1ee6d)"
inkscape:version="1.2.1 (1:1.2.1+202210291243+9c6d41e410)"
sodipodi:docname="taxprofiler_tube.svg"
xml:space="preserve"
inkscape:export-filename="taxprofiler_tube.png"
@ -26,7 +26,7 @@
inkscape:pageopacity="0.0"
inkscape:pagecheckerboard="true"
inkscape:document-units="mm"
showgrid="true"
showgrid="false"
inkscape:snap-bbox="true"
inkscape:bbox-nodes="true"
inkscape:snap-bbox-edge-midpoints="false"
@ -36,11 +36,11 @@
fit-margin-left="0"
fit-margin-right="0"
fit-margin-bottom="0"
inkscape:zoom="0.71542514"
inkscape:cx="808.61011"
inkscape:cy="440.29764"
inkscape:zoom="1.0117639"
inkscape:cx="596.48301"
inkscape:cy="463.05268"
inkscape:window-width="1920"
inkscape:window-height="1043"
inkscape:window-height="1016"
inkscape:window-x="0"
inkscape:window-y="0"
inkscape:window-maximized="1"
@ -2184,8 +2184,8 @@
id="rect2500"
width="555.625"
height="211.66667"
x="-793.74982"
y="42.333302"
x="-793.74988"
y="42.333309"
ry="0" /><g
id="g10937-1-7-9-4-2"
transform="rotate(180,-329.27382,134.93748)"><path
@ -2334,8 +2334,12 @@
style="fill:#999999;stroke:#999999;stroke-width:0.529167;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1"
d="m -619.38943,142.87497 v 68.79167"
id="path7567" /><path
style="fill:none;stroke:#ff6600;stroke-width:2.64583;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m -519.90607,108.47916 -3e-5,2.64581 c -6e-5,5.30683 2.50272,7.79439 5.29167,10.58334"
id="path1232-36-5-9-3-9-3-6-7-5-2"
sodipodi:nodetypes="csc" /><path
style="fill:#999999;stroke:#999999;stroke-width:0.529167;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
d="M -534.45818,68.791637 V 211.66664"
d="m -505.08943,71.43747 v 142.875"
id="path7571" /><path
style="fill:#999999;stroke:#999999;stroke-width:0.529167;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
d="m -465.9311,47.62497 0,164.04167"
@ -2374,10 +2378,6 @@
d="m -673.36444,169.3333 c 0,9.86265 10e-6,16.56418 0,21.16665 -1e-5,5.30683 2.50272,7.79439 5.29167,10.58334 2.64583,2.64584 7.9375,5.29167 10.58333,5.29167 l 19.84376,0"
id="path1232-36-5-9-3-9-3-3"
sodipodi:nodetypes="csssc" /><path
style="fill:none;stroke:#fccc04;stroke-width:2.64583;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m -500.06235,157.42705 c 0,-9.86265 10e-6,1.95665 0,-2.64582 -1e-5,-5.30683 2.50272,-7.79439 5.29167,-10.58334 2.64583,-2.64584 7.9375,-5.29167 10.58333,-5.29167 h 19.84376"
id="path1232-36-5-9-3-9-3-3-9"
sodipodi:nodetypes="csssc" /><path
style="fill:none;stroke:#ff6600;stroke-width:2.64583;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m -634.99985,169.3333 c 0,9.86265 1e-5,16.56418 0,21.16665 -10e-6,5.30683 2.50271,7.79439 5.29166,10.58334 2.64584,2.64584 7.9375,5.29167 10.58334,5.29167 h 5.29166"
id="path1232-36-5-9-3-9-3-3-5"
@ -2403,11 +2403,11 @@
id="path1232-36-5-9-3-9-3-6-7-5"
sodipodi:nodetypes="csssc" /><path
style="fill:none;stroke:#ff6600;stroke-width:2.64583;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m -651.9332,126.99999 h 111.125 c 3.94417,0 7.79437,2.50272 10.58332,5.29167 2.64585,2.64583 5.32325,6.63932 5.29169,10.58336 l -0.26461,33.07289 -0.26451,41.01043 c 0,3.94418 2.64584,7.93753 5.29169,10.58336 2.78895,2.78895 6.63915,5.29167 10.58332,5.29167 l 64.0291,-4e-5"
d="m -656.16652,126.99999 h 141.81667 c 3.94417,0 7.79437,2.50272 10.58332,5.29167 2.64585,2.64583 5.32325,6.63932 5.29169,10.58336 l -0.26461,33.07289 -0.26451,41.01043 c 0,3.94418 2.64584,7.93753 5.29169,10.58336 2.78895,2.78895 6.63915,5.29167 10.58332,5.29167 l 39.0626,-4e-5"
id="path1232-36-5-9-3-9-8"
sodipodi:nodetypes="cssscsssc" /><path
style="fill:none;stroke:#ff6600;stroke-width:2.64583;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m -636.32277,206.37497 97.63125,0"
d="m -637.64568,206.37497 119.0625,0"
id="path1232-36-5-9-3-9-8-6"
sodipodi:nodetypes="cc" /><path
style="fill:none;stroke:#fccc04;stroke-width:2.64583;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:0.988235"
@ -2561,7 +2561,7 @@
id="path2500-5-22-4-1"
sodipodi:nodetypes="cc" /><path
style="fill:none;stroke:#1a6aa7;stroke-width:2.64583;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m -596.37069,108.47916 h 66.14582"
d="m -596.37069,108.47916 77.78751,0"
id="path2500-5-22-4-1-22"
sodipodi:nodetypes="cc" /><path
style="fill:none;stroke:#fccc04;stroke-width:2.64583;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
@ -2569,7 +2569,7 @@
id="path2500-5-22-4-7"
sodipodi:nodetypes="cc" /><path
style="fill:none;stroke:#fccc04;stroke-width:2.64583;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m -686.32906,193.14583 h 142.875"
d="m -686.32906,193.14583 h 152.6978"
id="path2500-5-22-4-7-8"
sodipodi:nodetypes="cc" /><path
style="fill:none;stroke:#0b6e4f;stroke-width:2.64583;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
@ -2631,7 +2631,8 @@
id="path1232-7-2-7-9-7"
sodipodi:nodetypes="csssscc" /><g
id="g10758"
style="stroke:#1a6aa7;stroke-opacity:1"><path
style="stroke:#1a6aa7;stroke-opacity:1"
transform="translate(10.583333)"><path
style="fill:none;stroke:#1a6aa7;stroke-width:2.64583;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m -530.75402,108.47916 h 8.46666 c 3.94417,0 7.79437,2.50273 10.58333,5.29167 l 4.23335,4.23331"
id="path1232-7-2-7-9"
@ -2672,6 +2673,10 @@
d="m -441.17453,169.74835 -22.9045,-22.90461"
id="path2500-5-6-0-3-9-2"
sodipodi:nodetypes="cc" /><path
style="fill:none;stroke:#fccc04;stroke-width:2.64583;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m -468.31237,140.22916 -2.64581,-3e-5 c -5.30683,-6e-5 -7.79439,2.50272 -10.58334,5.29167 -2.64584,2.64583 -5.29168,7.9375 -5.29167,10.58333 l 10e-6,2.64583"
id="path1232-36-5-9-3-9-3-6-7-5-9"
sodipodi:nodetypes="csssc" /><path
style="fill:none;stroke:#0b6e4f;stroke-width:2.64583;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m -485.24583,138.90621 h 7.93748 c 3.94418,0 7.79431,2.5028 10.58334,5.2917 l 3.96891,3.96873"
id="path1232-7-9-8-4"
@ -2711,25 +2716,15 @@
style="fill:none;stroke:#fccc04;stroke-width:2.64583;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="M 113.57714,162.51554 97.287094,178.54103"
id="path2500-5-6-0-2-2"
sodipodi:nodetypes="cc" /></g><g
id="g59518-7-7-7"
transform="matrix(0,-1,-1,0,-358.97799,284.23333)"><path
style="fill:none;stroke:#fccc04;stroke-width:2.64583;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 91.087511,186.0635 v -7.93749 c 0,-3.94417 2.50272,-7.79437 5.291655,-10.58333 l 2.645843,-2.64584"
id="path1232-7-2-3-4-9"
sodipodi:nodetypes="cssc" /><path
style="fill:none;stroke:#fccc04;stroke-width:2.64583;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 145.3271,127.85519 0,22.48955 c 0,3.94417 -2.50272,7.79437 -5.29166,10.58333 l -2.64584,2.64584"
id="path1232-7-2-3-4-9-4"
sodipodi:nodetypes="cssc" /><path
style="fill:none;stroke:#fccc04;stroke-width:2.64583;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 130.77503,141.08433 -2.64584,3e-5 c -3.94417,4e-5 -7.79437,2.50272 -10.58333,5.29166 l -2.64584,2.64584"
id="path1232-7-2-7-61-5-2"
sodipodi:nodetypes="cssc" /><path
style="fill:none;stroke:#fccc04;stroke-width:2.64583;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 114.90001,149.02186 -15.875001,15.875"
id="path2500-5-6-0-2-2-0"
sodipodi:nodetypes="cc" /></g><text
sodipodi:nodetypes="cc" /></g><path
style="fill:none;stroke:#ff6600;stroke-width:2.64583;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m -519.9061,144.1979 v 7.93749 c 0,3.94417 2.50272,7.79437 5.29165,10.58333 l 2.64585,2.64584 5.09793,4.43647 2.64585,2.64584 c 2.78893,2.78896 5.29165,6.63916 5.29165,10.58333 v 7.93749"
id="path1232-7-2-3-4-9"
sodipodi:nodetypes="cssccssc" /><path
style="fill:none;stroke:#fccc04;stroke-width:2.64583;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m -486.83318,138.90623 h -22.48955 c -3.94417,0 -7.79437,2.50272 -10.58333,5.29166 l -2.64584,2.64584"
id="path1232-7-2-3-4-9-4"
sodipodi:nodetypes="cssc" /><text
xml:space="preserve"
style="font-size:4.23333px;line-height:1.25;font-family:Commissioner;-inkscape-font-specification:Commissioner;text-align:center;letter-spacing:0px;text-anchor:middle;fill:#000000;stroke-width:0.264583"
x="-656.75073"
@ -2762,29 +2757,29 @@
id="tspan26367">FILTERING)</tspan></text><text
xml:space="preserve"
style="font-size:4.23333px;line-height:1.25;font-family:Commissioner;-inkscape-font-specification:Commissioner;letter-spacing:0px;fill:#000000;stroke-width:0.264583"
x="-554.07758"
x="-538.20251"
y="71.856567"
id="text9610-5-6"><tspan
sodipodi:role="line"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro Bold';text-align:center;text-anchor:middle;fill:#000000;stroke-width:0.264583"
x="-554.07758"
x="-538.20251"
y="71.856567"
id="tspan26367-2">(HOST </tspan><tspan
sodipodi:role="line"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro Bold';text-align:center;text-anchor:middle;fill:#000000;stroke-width:0.264583"
x="-554.07758"
y="77.191772"
id="tspan10108">REMOVAL)</tspan></text><text
id="tspan10108"
sodipodi:role="line">(HOST REMOVAL)</tspan></text><text
xml:space="preserve"
style="font-size:4.23333px;line-height:1.25;font-family:Commissioner;-inkscape-font-specification:Commissioner;letter-spacing:0px;fill:#000000;stroke-width:0.264583"
x="-501.1207"
x="-485.24573"
y="71.856567"
id="text9610-5-6-4"><tspan
sodipodi:role="line"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro Bold';text-align:center;text-anchor:middle;fill:#000000;stroke-width:0.264583"
x="-501.1207"
x="-485.24573"
y="71.856567"
id="tspan10110">(RUN MERGING)</tspan></text><text
id="tspan10110">(RUN </tspan><tspan
sodipodi:role="line"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro Bold';text-align:center;text-anchor:middle;fill:#000000;stroke-width:0.264583"
x="-485.24573"
y="77.191772"
id="tspan2871">MERGING)</tspan></text><text
xml:space="preserve"
style="font-size:4.23333px;line-height:1.25;font-family:Commissioner;-inkscape-font-specification:Commissioner;letter-spacing:0px;fill:#000000;stroke-width:0.264583"
x="-445.03543"
@ -2944,13 +2939,43 @@
id="tspan35786">Bowtie2</tspan></text><text
xml:space="preserve"
style="font-size:4.23333px;line-height:1.25;font-family:Commissioner;-inkscape-font-specification:Commissioner;letter-spacing:0px;stroke-width:0.264583"
x="-493.10275"
y="134.8528"
x="-519.92303"
y="96.661186"
id="text15609-36-5-0"><tspan
sodipodi:role="line"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro Bold';text-align:center;text-anchor:middle;stroke-width:0.264583"
x="-519.92303"
y="96.661186"
id="tspan35786-62">samtools</tspan><tspan
sodipodi:role="line"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro Bold';text-align:center;text-anchor:middle;stroke-width:0.264583"
x="-519.92303"
y="101.99639"
id="tspan2830">stats</tspan></text><text
xml:space="preserve"
style="font-size:4.23333px;line-height:1.25;font-family:Commissioner;-inkscape-font-specification:Commissioner;letter-spacing:0px;stroke-width:0.264583"
x="-523.45795"
y="136.0361"
id="text15609-36-5-0-6"><tspan
sodipodi:role="line"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro Bold';text-align:end;text-anchor:end;stroke-width:0.264583"
x="-523.45795"
y="136.0361"
id="tspan35786-62-1">samtools</tspan><tspan
sodipodi:role="line"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro Bold';text-align:end;text-anchor:end;stroke-width:0.264583"
x="-523.45795"
y="141.37131"
id="tspan2830-8">stats</tspan></text><text
xml:space="preserve"
style="font-size:4.23333px;line-height:1.25;font-family:Commissioner;-inkscape-font-specification:Commissioner;letter-spacing:0px;stroke-width:0.264583"
x="-481.27704"
y="134.60205"
id="text15609-36-5-2"><tspan
sodipodi:role="line"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro Bold';text-align:center;text-anchor:middle;stroke-width:0.264583"
x="-493.10275"
y="134.8528"
x="-481.27704"
y="134.60205"
id="tspan35786-7">cat</tspan></text><text
xml:space="preserve"
style="font-size:4.23333px;line-height:1.25;font-family:Commissioner;-inkscape-font-specification:Commissioner;letter-spacing:0px;stroke-width:0.264583"
@ -3101,33 +3126,63 @@
xml:space="preserve"
style="font-size:4.23333px;line-height:1.25;font-family:Commissioner;-inkscape-font-specification:Commissioner;letter-spacing:0px;stroke-width:0.264583"
x="-746.1651"
y="132.20062"
y="126.90896"
id="text15609-36-6"><tspan
sodipodi:role="line"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro Bold';text-align:center;text-anchor:middle;stroke-width:0.264583"
x="-746.1651"
y="132.20062"
id="tspan15611-0-4">FastQC</tspan></text><text
y="126.90896"
id="tspan15611-0-4">FastQC</tspan><tspan
sodipodi:role="line"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro Bold';text-align:center;text-anchor:middle;stroke-width:0.264583"
x="-746.1651"
y="132.24416"
id="tspan1305">falco</tspan></text><text
xml:space="preserve"
style="font-size:4.23333px;line-height:1.25;font-family:Commissioner;-inkscape-font-specification:Commissioner;letter-spacing:0px;stroke-width:0.264583"
x="-746.1651"
y="152.30898"
id="text15609-36-6-3"><tspan
sodipodi:role="line"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro Bold';text-align:center;text-anchor:middle;stroke-width:0.264583"
x="-746.1651"
y="152.30898"
id="tspan15611-0-4-6">FastQC</tspan><tspan
sodipodi:role="line"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro Bold';text-align:center;text-anchor:middle;stroke-width:0.264583"
x="-746.1651"
y="157.64418"
id="tspan1305-7">falco</tspan></text><text
xml:space="preserve"
style="font-size:4.23333px;line-height:1.25;font-family:Commissioner;-inkscape-font-specification:Commissioner;letter-spacing:0px;stroke-width:0.264583"
x="-634.78503"
y="101.77354"
y="96.481873"
id="text15609-36-6-5"><tspan
sodipodi:role="line"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro Bold';text-align:center;text-anchor:middle;stroke-width:0.264583"
x="-634.78503"
y="101.77354"
id="tspan15611-0-4-9">FastQC</tspan></text><text
y="96.481873"
id="tspan15611-0-4-9">FastQC</tspan><tspan
sodipodi:role="line"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro Bold';text-align:center;text-anchor:middle;stroke-width:0.264583"
x="-634.78503"
y="101.81708"
id="tspan1307">falco</tspan></text><text
xml:space="preserve"
style="font-size:4.23333px;line-height:1.25;font-family:Commissioner;-inkscape-font-specification:Commissioner;letter-spacing:0px;stroke-width:0.264583"
x="-635.0401"
y="162.07314"
y="156.78146"
id="text15609-36-6-5-6"><tspan
sodipodi:role="line"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro Bold';text-align:center;text-anchor:middle;stroke-width:0.264583"
x="-635.0401"
y="162.07314"
id="tspan15611-0-4-9-7">FastQC</tspan></text><path
y="156.78146"
id="tspan15611-0-4-9-7">FastQC</tspan><tspan
sodipodi:role="line"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro Bold';text-align:center;text-anchor:middle;stroke-width:0.264583"
x="-635.0401"
y="162.11667"
id="tspan1309">falco</tspan></text><path
style="fill:#ffffff;stroke:none;stroke-width:0.529167;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1"
d="m -653.52069,109.80206 2.64583,-1.32292 -2.64583,-1.32292 z"
id="path1461" /><path
@ -3141,23 +3196,38 @@
d="m -653.52069,128.32289 2.64583,-1.32292 -2.64583,-1.32292 z"
id="path1461-22" /><path
style="fill:#ffffff;stroke:none;stroke-width:0.529167;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1"
d="m -653.52069,170.65622 2.64583,-1.32292 -2.64583,-1.32292 z"
id="path1461-61" /><path
style="fill:#ffffff;stroke:none;stroke-width:0.529167;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1"
d="m -653.52069,207.69791 2.64583,-1.32292 -2.64583,-1.32291 z"
id="path1461-22-2" /><path
style="fill:#ffffff;stroke:none;stroke-width:0.529167;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1"
d="m -615.1561,128.32288 2.64583,-1.32291 -2.64583,-1.32292 z"
id="path1461-6-8" /><path
style="fill:#ffffff;stroke:none;stroke-width:0.529167;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1"
d="m -572.82276,128.32287 2.64583,-1.32292 -2.64583,-1.32292 z"
id="path1461-5-9" /><path
style="fill:#ffffff;stroke:none;stroke-width:0.529167;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1"
d="m -542.39567,109.80206 2.64583,-1.32292 -2.64583,-1.32292 z"
d="m -539.74984,127.79181 2.64583,-1.32292 -2.64583,-1.32292 z"
id="path1461-5-9-3" /><path
style="fill:#ffffff;stroke:none;stroke-width:0.529167;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1"
d="m -539.74984,109.80206 2.64583,-1.32292 -2.64583,-1.32292 z"
id="path1461-3" /><path
style="fill:#ffffff;stroke:none;stroke-width:0.529167;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1"
d="m -620.45451,170.65622 2.64583,-1.32292 -2.64583,-1.32292 z"
d="m -615.1561,170.65622 2.64583,-1.32292 -2.64583,-1.32292 z"
id="path1461-56" /><path
style="fill:#ffffff;stroke:none;stroke-width:0.529167;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1"
d="m -524.81029,150.97298 0.93544,-2.80634 -2.80633,0.93545 z"
id="path1461-56-9" /><path
style="fill:#ffffff;stroke:none;stroke-width:0.529167;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1"
d="m -524.73004,187.93439 0.93544,-2.80634 -2.80633,0.93545 z"
id="path1461-56-9-7" /><circle
id="path1461-56-9" /><g
id="g2883"
transform="translate(10.583333)"><path
style="fill:none;stroke:#fccc04;stroke-width:2.64583;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m -497.41649,156.10414 -3e-5,7.9375 c 0,0.51369 -0.0425,1.02579 -0.12247,1.53415 -0.53433,3.39492 -2.74347,6.62346 -5.16919,9.04918 l -2.64584,2.64584 -7.93747,7.93751 -2.64584,2.64584 c -2.78896,2.78894 -6.63916,5.29166 -10.58333,5.29166 l -19.24899,0"
id="path1232-7-2-7-61-5-2"
sodipodi:nodetypes="csssccssc" /><path
style="fill:#ffffff;stroke:none;stroke-width:0.529167;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1"
d="m -524.73004,187.93439 0.93544,-2.80634 -2.80633,0.93545 z"
id="path1461-56-9-7" /></g><circle
style="fill:#ffffff;stroke:#000000;stroke-width:2.64583;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none;paint-order:stroke markers fill;stop-color:#000000"
id="path3447-9-1"
cx="-673.3645"
@ -3205,11 +3275,11 @@
d="m -735.54152,165.36455 2.64584,-1.32291 -2.64584,-1.32291 z"
id="path1461-29-94-9-1-3" /><path
style="fill:#ffffff;stroke:none;stroke-width:0.529167;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1"
d="m -486.83318,140.22913 2.64583,-1.32291 -2.64583,-1.32292 z"
d="m -481.54151,140.22913 2.64583,-1.32291 -2.64583,-1.32292 z"
id="path1461-29-3" /><circle
style="fill:#ffffff;stroke:#000000;stroke-width:2.64583;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none;paint-order:stroke markers fill;stop-color:#000000"
id="path3447-9-1-2-1"
cx="-500.06235"
cx="-488.94986"
cy="138.90622"
r="2.6458333" /><g
id="g10990"
@ -3487,7 +3557,7 @@
id="path2821" /></g></g></g></g><text
xml:space="preserve"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:88.3725px;line-height:1.25;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro';fill:#050505;fill-opacity:1;stroke-width:0.92054"
x="-79.84938"
x="-62.132641"
y="299.66867"
id="text8068"
inkscape:export-filename="/Users/whx424/Pictures/Illustrations/taxprofiler_v11.png"
@ -3496,7 +3566,7 @@
sodipodi:role="line"
id="tspan8066"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:88.3725px;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro Bold';fill:#050505;fill-opacity:1;stroke-width:0.92054"
x="-79.84938"
x="-62.132641"
y="299.66867">taxprofiler</tspan></text><text
xml:space="preserve"
style="font-size:87.792px;line-height:1.25;font-family:sans-serif;fill:#050505;fill-opacity:1;stroke-width:0.557997"
@ -3627,19 +3697,25 @@
rx="1.4964142"
ry="1.4176555" /></g></g><path
style="fill:none;stroke:#ff6600;stroke-width:2.64583;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m -546.09986,206.37496 h 7.93749 c 3.94417,0 7.79437,2.50271 10.58333,5.29166 l 2.64584,2.64584"
d="m -519.64151,206.37496 h 7.93749 c 3.94417,0 7.79437,2.50271 10.58333,5.29166 l 2.64584,2.64584"
id="path1232-7-5"
sodipodi:nodetypes="cssc" /><path
style="fill:#ffffff;stroke:none;stroke-width:0.529167;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1"
d="m -620.94623,207.69789 2.64583,-1.32292 -2.64583,-1.32292 z"
d="m -615.1561,207.69789 2.64583,-1.32292 -2.64583,-1.32292 z"
id="path1461-92" /><path
style="fill:#ffffff;stroke:none;stroke-width:0.529167;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1"
d="m -620.45451,194.46875 2.64583,-1.32292 -2.64583,-1.32292 z"
d="m -615.1561,194.46875 2.64583,-1.32292 -2.64583,-1.32292 z"
id="path1461-92-4" /><path
style="fill:#ffffff;stroke:none;stroke-width:0.529167;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1"
d="m -572.82276,207.79062 2.64583,-1.32292 -2.64583,-1.32291 z"
id="path1461-5-3-6" /><path
style="fill:#ffffff;stroke:none;stroke-width:0.529167;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1"
d="m -572.82276,194.46872 2.64583,-1.32292 -2.64583,-1.32291 z"
id="path1461-5-3-6-9" /><path
style="fill:#ffffff;stroke:none;stroke-width:0.529167;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1"
d="m -572.82277,170.61911 2.64583,-1.32292 -2.64583,-1.32291 z"
id="path1461-5-3-6-3" /><path
style="fill:#ffffff;stroke:none;stroke-width:0.529167;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1"
d="m -539.74984,207.69788 2.64583,-1.32291 -2.64583,-1.32292 z"
id="path1461-92-0" /><g
id="g2638"
@ -4222,6 +4298,16 @@
id="path3447-9-6"
cx="-556.94775"
cy="108.47913"
r="2.6458333" /><circle
style="fill:#ffffff;stroke:#000000;stroke-width:2.64583;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none;paint-order:stroke markers fill;stop-color:#000000"
id="path3447-9-6-3"
cx="-519.90613"
cy="108.47913"
r="2.6458333" /><circle
style="fill:#ffffff;stroke:#000000;stroke-width:2.64583;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none;paint-order:stroke markers fill;stop-color:#000000"
id="path3447-9-6-3-6"
cx="-519.90613"
cy="144.19789"
r="2.6458333" /><g
id="g10653"
transform="translate(12.170834)"><path
@ -4404,4 +4490,13 @@
id="path1461-29-9-8-6-6-7-5" /><path
style="fill:#ffffff;stroke:none;stroke-width:0.529167;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1"
d="m -334.39053,210.81144 2.80632,0.93544 -0.93544,-2.80633 z"
id="path1461-29-9-8-6-6-7-69" /></g></g></svg>
id="path1461-29-9-8-6-6-7-69" /><path
style="fill:none;stroke:#ff6600;stroke-width:2.64583333;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;stroke-dasharray:none"
d="m -514.61444,121.7083 7.9375,7.9375"
id="path4190" /><path
style="fill:#ffffff;stroke:none;stroke-width:0.529167;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1"
d="m -516.0176,122.03433 2.80633,0.93544 -0.93543,-2.80634 z"
id="path1461-3-8" /><path
style="fill:#ffffff;stroke:none;stroke-width:0.529167;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1"
d="m -514.16551,165.03853 2.80633,0.93544 -0.93543,-2.80634 z"
id="path1461-3-8-1" /></g></g></svg>

Before

Width:  |  Height:  |  Size: 262 KiB

After

Width:  |  Height:  |  Size: 269 KiB

View file

@ -47,6 +47,7 @@ The pipeline will auto-detect whether a sample is single- or paired-end using th
A final samplesheet file consisting of both single- and paired-end data, as well as long-read FASTA files may look something like the one below. This is for 6 samples, where `2612` has been sequenced twice.
```console
sample,run_accession,instrument_platform,fastq_1,fastq_2,fasta
2611,ERR5766174,ILLUMINA,,,/<path>/<to>/fasta/ERX5474930_ERR5766174_1.fa.gz
2612,ERR5766176,ILLUMINA,/<path>/<to>/fastq/ERX5474932_ERR5766176_1.fastq.gz,/<path>/<to>/fastq/ERX5474932_ERR5766176_2.fastq.gz,
2612,ERR5766180,ILLUMINA,/<path>/<to>/fastq/ERX5474936_ERR5766180_1.fastq.gz,,
@ -73,14 +74,15 @@ The pipeline takes the locations and specific profiling parameters of the tool o
> ⚠️ nf-core/taxprofiler does not provide any databases by default, nor does it currently generate them for you. This must be performed manually by the user. See below for more information of the expected database files.
An example database sheet can look as follows, where 4 tools are being used, and `malt` and `kraken2` will be used against two databases each.
An example database sheet can look as follows, where 5 tools are being used, and `malt` and `kraken2` will be used against two databases each. This is because specifying `bracken` implies first running `kraken2` on the same database.
```console
tool,db_name,db_params,db_path
malt,malt85,-id 85,/<path>/<to>/malt/testdb-malt/
malt,malt95,-id 90,/<path>/<to>/malt/testdb-malt.tar.gz
kraken2,db1,,/<path>/<to>/kraken2/testdb-kraken2.tar.gz
bracken,db1,,/<path>/<to>/bracken/testdb-bracken.tar.gz
kraken2,db2,--quick,/<path>/<to>/kraken2/testdb-kraken2.tar.gz
krakenuniq,db3,,/<path>/<to>/krakenuniq/testdb-krakenuniq.tar.gz
centrifuge,db1,,/<path>/<to>/centrifuge/minigut_cf.tar.gz
metaphlan3,db1,,/<path>/<to>/metaphlan3/metaphlan_database/
motus,db_mOTU,,/<path>/<to>/motus/motus_database/
@ -90,8 +92,8 @@ Column specifications are as follows:
| Column | Description |
| ----------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `tool` | Taxonomic profiling tool (supported by nf-core/taxprofiler) that the database has been indexed for [required]. |
| `db_name` | A unique name of the particular database [required]. |
| `tool` | Taxonomic profiling tool (supported by nf-core/taxprofiler) that the database has been indexed for [required]. Please note that `bracken` also implies running `kraken2` on the same database. |
| `db_name` | A unique name per tool for the particular database [required]. Please note that names need to be unique across both `kraken2` and `bracken` as well, even if re-using the same database. |
| `db_params` | Any parameters of the given taxonomic profiler that you wish to specify that the taxonomic profiling tool should use when profiling against this specific. Can be empty to use taxonomic profiler defaults. Must not be surrounded by quotes [required]. We generally do not recommend specifying parameters here that turn on/off saving of output files or specifying particular file extensions - this should be already addressed via pipeline parameters. |
| `db_path` | Path to the database. Can either be a path to a directory containing the database index files or a `.tar.gz` file which contains the compressed database directory with the same name as the tar archive, minus `.tar.gz` [required]. |
@ -115,6 +117,21 @@ Expected (uncompressed) database files for each tool are as follows:
- `opts.k2d`
- `hash.k2d`
- `taxo.k2d`
- **Bracken** output of a combined `kraken2-` and `bracken-build` process. Please see the [documentation on Bracken](https://github.com/jenniferlu717/Bracken#running-bracken-easy-version) for details. The output is a directory containing files per expected sequencing read length similarly to:
- `hash.k2d`
- `opts.k2d`
- `taxo.k2d`
- `database.kraken`
- `database100mers.kmer_distrib`
- `database100mers.kraken`
- `database150mers.kmer_distrib`
- `database150mers.kraken`
- **KrakenUniq** output of `krakenuniq-build` command(s) A directory containing:
- `opts.k2d`
- `hash.k2d`
- `taxo.k2d`
- `database.idx`
- `taxDB`
- **Centrifuge** output of `centrifuge-build`. A directory containing:
- `<database_name>.<number>.cf`
- `<database_name>.<number>.cf`
@ -166,6 +183,10 @@ work # Directory containing the nextflow working files
# Other nextflow hidden files, eg. history of pipeline runs and old logs.
```
### Sequencing quality control
nf-core taxprofiler offers [`falco`][https://github.com/smithlabcode/falco] as an alternative option to [`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
### Preprocessing Steps
nf-core/taxprofiler offers four main preprocessing steps
@ -179,12 +200,12 @@ nf-core/taxprofiler offers four main preprocessing steps
Raw sequencing read processing in the form of adapter clipping and paired-end read merging can be activated via the `--perform_shortread_qc` or `--perform_longread_qc` flags.
It is highly recommended to run this on raw reads to remove artefacts from sequencing that can cause false positive identification of taxa (e.g. contaminated reference genomes) and/or skews in taxonomic abundance profiles.
It is highly recommended to run this on raw reads to remove artifacts from sequencing that can cause false positive identification of taxa (e.g. contaminated reference genomes) and/or skews in taxonomic abundance profiles.
There are currently two options for short-read preprocessing: `fastp` or `adapterremoval`.
For adapter clipping, you can either rely on tool default adapter sequences, or supply your own adapters (`--shortread_qc_adapter1` and `--shortread_qc_adapter2`)
By default, paired-end merging is not activated and paired-end profiling is performed where supported otherwise pairs will be independently profiled. If paired-end merging is activated you can also specify whether to exclude unmerged reads in the reads sent for profiling (`--shortread_qc_mergepairs` and `--shortread_qc_excludeunmerged`).
By default, paired-end merging is not activated and paired-end profiling is performed where supported otherwise pairs will be independently profiled. If paired-end merging is activated you can also specify whether to include unmerged reads in the reads sent for profiling (`--shortread_qc_mergepairs` and `--shortread_qc_includeunmerged`).
You can also turn off clipping and only perform paired-end merging, if requested. This can be useful when processing data downloaded from the ENA, SRA, or DDBJ (`--shortread_qc_skipadaptertrim`).
Both tools support length filtering of reads and can be tuned with `--shortread_qc_minlength`. Performing length filtering can be useful to remove short (often low sequencing complexity) sequences that result in unspecific classification and therefore slow down runtime during profiling, with minimal gain.
@ -230,9 +251,31 @@ You can optionally save the FASTQ output of the run merging with the `--save_run
#### Profiling
###### Bracken
It is unclear whether Bracken is suitable for running long reads, as it makes certain assumptions about read lengths. Furthemore, during testing we found issues where Bracken would fail on the long-read test data. Therefore nf-core/taxprofiler does not run Bracken on data specified as being sequenced with `OXFORD_NANOPORE` in the input samplesheet. If you believe this to be wrong, please contact us on the nf-core slack and we can discuss this.
###### Centrifuge
Centrifuge currently does not accept FASTA files as input, therefore no output will be produced for these input files.
###### DIAMOND
DIAMOND only allows output of a single format at a time, therefore parameters such --diamond_save_reads supplied will result in only aligned reads in SAM format will be produced, no taxonomic profiles will be available. Be aware of this when setting up your pipeline runs, depending n your particular use case.
###### MALT
nf-core/taxprofiler uses MALT 0.4.1, which is a compatively old version. However it has been found that the most recent version of MALT (0.5.\*), at the time of writing, is broken. [The the LCA step appears not to be executed](http://megan.informatik.uni-tuebingen.de/t/lca-placement-failure-with-malt-v-0-5-2-and-0-5-3/1996/3), pushing all hits to the leaves of the taxonomy. However, if you need to use a more recent taxonomy map file with your databases, the output of `malt-build` from MALT 0.5.3 should be still be compatible with `malt-run` of 0.4.1.
MALT does not support paired-end reads alignment (unlike other tools), therefore nf-core/taxprofiler aligns these as indepenent files if read-merging is skipped. If you skip merging, you can sum or average the results of the counts of the pairs.
Krona can only be run on MALT output if path to Krona taxonomy database supplied to `--krona_taxonomy_directory`. Therefore if you do not supply the a KRona directory, Krona plots will not be produced for MALT.
###### MetaPhlAn3
MetaPhlAn3 currently does not accept FASTA files as input, therefore no output will be produced for these input files.
###### mOTUs
mOTUs currently does not accept FASTA files as input, therefore no output will be produced for these input files.
### Updating the pipeline
@ -461,7 +504,7 @@ malt-build -i path/to/fasta/files/*.{fna,fa} -s DNA -d index -t 8 -st 4 -a2t meg
## Troubleshooting and FAQs
### I get a warning during centrifuge_kreport process with exit status 255.
### I get a warning during centrifuge_kreport process with exit status 255
When a sample has insufficient hits for abundance estimation, the resulting `report.txt` file will be empty.

View file

@ -12,9 +12,9 @@ class WorkflowMain {
// TODO nf-core: Add Zenodo DOI for pipeline after first release
//"* The pipeline\n" +
//" https://doi.org/10.5281/zenodo.XXXXXXX\n\n" +
"* The nf-core framework\n" +
" https://doi.org/10.1038/s41587-020-0439-x\n\n" +
"* Software dependencies\n" +
'* The nf-core framework\n' +
' https://doi.org/10.1038/s41587-020-0439-x\n\n' +
'* Software dependencies\n' +
" https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md"
}
@ -53,15 +53,15 @@ class WorkflowMain {
System.exit(0)
}
// Print parameter summary log to screen
log.info paramsSummaryLog(workflow, params, log)
// Validate workflow parameters via the JSON schema
if (params.validate_params) {
NfcoreSchema.validateParameters(workflow, params, log)
}
// Print parameter summary log to screen
log.info paramsSummaryLog(workflow, params, log)
// Check that a -profile or Nextflow config has been provided to run the pipeline
NfcoreTemplate.checkConfigProvided(workflow, log)
@ -90,4 +90,5 @@ class WorkflowMain {
}
return null
}
}

View file

@ -7,7 +7,7 @@
"nf-core": {
"adapterremoval": {
"branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
"git_sha": "ce7cf27e377fdacf7ebe8e75903ec70405ea1659"
},
"bbmap/bbduk": {
"branch": "master",
@ -21,6 +21,10 @@
"branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
},
"bracken/bracken": {
"branch": "master",
"git_sha": "8cab56516076b23c6f8eb1ac20ba4ce9692c85e1"
},
"cat/fastq": {
"branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
@ -49,9 +53,13 @@
"branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
},
"falco": {
"branch": "master",
"git_sha": "fc959214036403ad83efe7a41d43d0606c445cda"
},
"fastp": {
"branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
"git_sha": "1e49f31e93c56a3832833eef90a02d3cde5a3f7e"
},
"fastqc": {
"branch": "master",
@ -89,6 +97,10 @@
"branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
},
"krakenuniq/preloadedkrakenuniq": {
"branch": "master",
"git_sha": "05649975c6611c6e007537a7984e186e12ae03af"
},
"krona/ktimporttaxonomy": {
"branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
@ -99,7 +111,7 @@
},
"malt/run": {
"branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
"git_sha": "6d9712f03ec2de8264a50ee4541a617e1e063b51"
},
"megan/rma2info": {
"branch": "master",
@ -133,9 +145,9 @@
"branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
},
"porechop": {
"porechop/porechop": {
"branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
"git_sha": "2a4e85eb81875a572bb58133e37f84ba3cc484d7"
},
"prinseqplusplus": {
"branch": "master",
@ -145,6 +157,14 @@
"branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
},
"samtools/index": {
"branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
},
"samtools/stats": {
"branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
},
"samtools/view": {
"branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"

View file

@ -0,0 +1,32 @@
process KRAKEN2_STANDARD_REPORT {
tag "$meta.id"
label 'process_single'
conda (params.enable_conda ? 'conda-forge::sed=4.8' : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv2/biocontainers_v1.2.0_cv2.img' :
'biocontainers/biocontainers:v1.2.0_cv2' }"
input:
tuple val(meta), path(report)
output:
tuple val(meta), path(result), emit: report
path 'versions.yml' , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def prefix = task.ext.prefix ?: "${meta.id}"
result = "${prefix}_standardized.kraken2.report.txt"
"""
cut -f1-3,6-8 '${report}' > '${result}'
cat <<-END_VERSIONS > versions.yml
"${task.process}":
cut: \$(echo \$(cut --version 2>&1) | sed 's/^.*(GNU coreutils) //; s/ Copyright.*\$//')
END_VERSIONS
"""
}

View file

@ -34,7 +34,7 @@ process ADAPTERREMOVAL {
AdapterRemoval \\
--file1 $reads \\
$args \\
$adapterlist \\
$list \\
--basename ${prefix} \\
--threads ${task.cpus} \\
--seed 42 \\
@ -61,7 +61,7 @@ process ADAPTERREMOVAL {
--file1 ${reads[0]} \\
--file2 ${reads[1]} \\
$args \\
$adapterlist \\
$list \\
--basename ${prefix} \\
--threads $task.cpus \\
--seed 42 \\

42
modules/nf-core/bracken/bracken/main.nf generated Normal file
View file

@ -0,0 +1,42 @@
process BRACKEN_BRACKEN {
tag "$meta.id"
label 'process_low'
// WARN: Version information not provided by tool on CLI.
// Please update version string below when bumping container versions.
conda (params.enable_conda ? "bioconda::bracken=2.7" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bracken:2.7--py39hc16433a_0':
'quay.io/biocontainers/bracken:2.7--py39hc16433a_0' }"
input:
tuple val(meta), path(kraken_report)
path database
output:
tuple val(meta), path(bracken_report), emit: reports
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ""
def prefix = task.ext.prefix ?: "${meta.id}"
bracken_report = "${prefix}.tsv"
// WARN: Version information not provided by tool on CLI.
// Please update version string below when bumping container versions.
def VERSION = '2.7'
"""
bracken \\
${args} \\
-d '${database}' \\
-i '${kraken_report}' \\
-o '${bracken_report}'
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bracken: ${VERSION}
END_VERSIONS
"""
}

48
modules/nf-core/bracken/bracken/meta.yml generated Normal file
View file

@ -0,0 +1,48 @@
name: bracken_bracken
description: Re-estimate taxonomic abundance of metagenomic samples analyzed by kraken.
keywords:
- bracken
- metagenomics
- abundance
- kraken2
tools:
- bracken:
description: Bracken (Bayesian Reestimation of Abundance with KrakEN) is a highly accurate statistical method that computes the abundance of species in DNA sequences from a metagenomics sample.
homepage: https://ccb.jhu.edu/software/bracken/
documentation: https://ccb.jhu.edu/software/bracken/index.shtml?t=manual
tool_dev_url: https://github.com/jenniferlu717/Bracken
doi: "10.7717/peerj-cs.104"
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- kraken_report:
type: file
description: TSV file with six columns coming from kraken2 output
pattern: "*.{tsv}"
- database:
type: file
description: Directory containing the kraken2/Bracken files for analysis
pattern: "*"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- reports:
type: file
description: TSV output report of the re-estimated abundances
pattern: "*.{tsv}"
authors:
- "@Midnighter"

57
modules/nf-core/falco/main.nf generated Normal file
View file

@ -0,0 +1,57 @@
process FALCO {
tag "$meta.id"
label 'process_single'
conda (params.enable_conda ? "bioconda::falco=1.2.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/falco:1.2.1--h867801b_3':
'quay.io/biocontainers/falco:1.2.1--h867801b_3' }"
input:
tuple val(meta), path(reads)
output:
tuple val(meta), path("*.html"), emit: html
tuple val(meta), path("*.txt") , emit: txt
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
if ( reads.toList().size() == 1 ) {
"""
falco $args --threads $task.cpus ${reads} -D ${prefix}_data.txt -S ${prefix}_summary.txt -R ${prefix}_report.html
cat <<-END_VERSIONS > versions.yml
"${task.process}":
falco:\$( falco --version | sed -e "s/falco//g" )
END_VERSIONS
"""
} else {
"""
falco $args --threads $task.cpus ${reads}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
falco:\$( falco --version | sed -e "s/falco//g" )
END_VERSIONS
"""
}
stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}_data.txt
touch ${prefix}_fastqc_data.html
touch ${prefix}_summary.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
falco: \$( falco --version | sed -e "s/falco v//g" )
END_VERSIONS
"""
}

52
modules/nf-core/falco/meta.yml generated Normal file
View file

@ -0,0 +1,52 @@
name: falco
description: Run falco on sequenced reads
keywords:
- quality control
- qc
- adapters
- fastq
tools:
- fastqc:
description: "falco is a drop-in C++ implementation of FastQC to assess the quality of sequence reads."
homepage: "https://falco.readthedocs.io/"
documentation: "https://falco.readthedocs.io/"
tool_dev_url: "None"
doi: ""
licence: "['GPL v3']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- html:
type: file
description: FastQC like report
pattern: "*_{fastqc_report.html}"
- txt:
type: file
description: falco report data
pattern: "*_{data.txt}"
- txt:
type: file
description: falco summary file
pattern: "*_{summary.txt}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@lucacozzuto"

View file

@ -9,6 +9,7 @@ process FASTP {
input:
tuple val(meta), path(reads)
path adapter_fasta
val save_trimmed_fail
val save_merged
@ -27,6 +28,7 @@ process FASTP {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : ""
def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : ''
// Added soft-links to original fastqs for consistent naming in MultiQC
// Use single ended for interleaved. Add --interleaved_in in config.
@ -40,6 +42,7 @@ process FASTP {
--thread $task.cpus \\
--json ${prefix}.fastp.json \\
--html ${prefix}.fastp.html \\
$adapter_list \\
$fail_fastq \\
$args \\
2> ${prefix}.fastp.log \\
@ -61,6 +64,7 @@ process FASTP {
--thread $task.cpus \\
--json ${prefix}.fastp.json \\
--html ${prefix}.fastp.html \\
$adapter_list \\
$fail_fastq \\
$args \\
2> ${prefix}.fastp.log
@ -82,6 +86,7 @@ process FASTP {
--out2 ${prefix}_2.fastp.fastq.gz \\
--json ${prefix}.fastp.json \\
--html ${prefix}.fastp.html \\
$adapter_list \\
$fail_fastq \\
$merge_fastq \\
--thread $task.cpus \\

View file

@ -23,6 +23,10 @@ input:
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively. If you wish to run interleaved paired-end data, supply as single-end data
but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module.
- adapter_fasta:
type: file
description: File in FASTA format containing possible adapters to remove.
pattern: "*.{fasta,fna,fas,fa}"
- save_trimmed_fail:
type: boolean
description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz`

View file

@ -0,0 +1,224 @@
process KRAKENUNIQ_PRELOADEDKRAKENUNIQ {
tag "$meta.id"
label 'process_high'
conda (params.enable_conda ? "bioconda::krakenuniq=1.0.0" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/krakenuniq:1.0.0--pl5321h19e8d03_0':
'quay.io/biocontainers/krakenuniq:1.0.0--pl5321h19e8d03_0' }"
input:
tuple val(meta), path(fastqs)
path db
val ram_chunk_size
val save_output_fastqs
val report_file
val save_output
output:
tuple val(meta), path('*.classified{.,_}*') , optional:true, emit: classified_reads_fastq
tuple val(meta), path('*.unclassified{.,_}*') , optional:true, emit: unclassified_reads_fastq
tuple val(meta), path('*classified.txt') , optional:true, emit: classified_assignment
tuple val(meta), path('*report.txt') , emit: report
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def args2 = task.ext.args ?: ''
def classified = meta.single_end ? '"\${PREFIX}.classified.fastq"' : '"\${PREFIX}.classified#.fastq"'
def unclassified = meta.single_end ? '"\${PREFIX}.unclassified.fastq"' : '"\${PREFIX}.unclassified#.fastq"'
def classified_option = save_output_fastqs ? "--classified-out ${classified}" : ''
def unclassified_option = save_output_fastqs ? "--unclassified-out ${unclassified}" : ''
def output_option = save_output ? '--output "\${PREFIX}.krakenuniq.classified.txt"' : ''
def report = report_file ? '--report-file "\${PREFIX}.krakenuniq.report.txt"' : ''
def compress_reads_command = save_output_fastqs ? 'gzip --no-name *.fastq' : ''
if (meta.single_end) {
"""
krakenuniq \\
--db $db \\
--preload \\
--preload-size $ram_chunk_size \\
--threads $task.cpus \\
$args
strip_suffix() {
local result=\$1
# Strip any file extensions.
echo "\${result%%.*}"
}
printf "%s\\n" ${fastqs} | while read FASTQ; do \\
PREFIX="\$(strip_suffix "\${FASTQ}")"
krakenuniq \\
--db $db \\
--threads $task.cpus \\
$report \\
$output_option \\
$unclassified_option \\
$classified_option \\
$output_option \\
$args2 \\
"\${FASTQ}"
done
$compress_reads_command
cat <<-END_VERSIONS > versions.yml
"${task.process}":
krakenuniq: \$(echo \$(krakenuniq --version 2>&1) | sed 's/^.*KrakenUniq version //; s/ .*\$//')
END_VERSIONS
"""
} else {
"""
krakenuniq \\
--db $db \\
--preload \\
--preload-size $ram_chunk_size \\
--threads $task.cpus \\
$args
strip_suffix() {
local result
read result
# Strip any trailing dot or underscore.
result="\${result%_}"
echo "\${result%.}"
}
printf "%s %s\\n" ${fastqs} | while read FASTQ; do \\
read -r -a FASTQ <<< "\${FASTQ}"
PREFIX="\$(printf "%s\\n" "\${FASTQ[@]}" | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' | strip_suffix)"
krakenuniq \\
--db $db \\
--threads $task.cpus \\
$report \\
$output_option \\
$unclassified_option \\
$classified_option \\
$output_option \\
--paired \\
$args2 \\
"\${FASTQ[@]}"
done
$compress_reads_command
cat <<-END_VERSIONS > versions.yml
"${task.process}":
krakenuniq: \$(echo \$(krakenuniq --version 2>&1) | sed 's/^.*KrakenUniq version //; s/ .*\$//')
END_VERSIONS
"""
}
stub:
def args = task.ext.args ?: ''
def args2 = task.ext.args ?: ''
def classified = meta.single_end ? '"\${PREFIX}.classified.fastq"' : '"\${PREFIX}.classified#.fastq"'
def unclassified = meta.single_end ? '"\${PREFIX}.unclassified.fastq"' : '"\${PREFIX}.unclassified#.fastq"'
def classified_option = save_output_fastqs ? "--classified-out ${classified}" : ''
def unclassified_option = save_output_fastqs ? "--unclassified-out ${unclassified}" : ''
def output_option = save_output ? '--output "\${PREFIX}.krakenuniq.classified.txt"' : ''
def report = report_file ? '--report-file "\${PREFIX}.krakenuniq.report.txt"' : ''
def compress_reads_command = save_output_fastqs ? 'gzip --no-name *.fastq' : ''
if (meta.single_end) {
"""
echo krakenuniq \\
--db $db \\
--preload \\
--preload-size $ram_chunk_size \\
--threads $task.cpus \\
$args
strip_suffix() {
local result=\$1
# Strip any file extensions.
echo "\${result%%.*}"
}
printf "%s\\n" ${fastqs} | while read FASTQ; do \\
echo "\${FASTQ}"
PREFIX="\$(strip_suffix "\${FASTQ}")"
echo "\${PREFIX}"
echo krakenuniq \\
--db $db \\
--threads $task.cpus \\
$report \\
$output_option \\
$unclassified_option \\
$classified_option \\
$output_option \\
$args2 \\
"\${FASTQ}"
touch "\${PREFIX}.classified.fastq.gz"
touch "\${PREFIX}.krakenuniq.classified.txt"
touch "\${PREFIX}.krakenuniq.report.txt"
touch "\${PREFIX}.unclassified.fastq.gz"
done
echo $compress_reads_command
cat <<-END_VERSIONS > versions.yml
"${task.process}":
krakenuniq: \$(echo \$(krakenuniq --version 2>&1) | sed 's/^.*KrakenUniq version //; s/ .*\$//')
END_VERSIONS
"""
} else {
"""
echo krakenuniq \\
--db $db \\
--preload \\
--preload-size $ram_chunk_size \\
--threads $task.cpus \\
$args
strip_suffix() {
local result
read result
# Strip any trailing dot or underscore.
result="\${result%_}"
echo "\${result%.}"
}
printf "%s %s\\n" ${fastqs} | while read FASTQ; do \\
read -r -a FASTQ <<< "\${FASTQ}"
echo "\${FASTQ[@]}"
PREFIX="\$(printf "%s\\n" "\${FASTQ[@]}" | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' | strip_suffix)"
echo "\${PREFIX}"
echo krakenuniq \\
--db $db \\
--threads $task.cpus \\
$report \\
$output_option \\
$unclassified_option \\
$classified_option \\
$output_option \\
--paired \\
$args2 \\
"\${FASTQ[@]}"
touch "\${PREFIX}.classified_1.fastq.gz" "\${PREFIX}.classified_2.fastq.gz"
touch "\${PREFIX}.krakenuniq.classified.txt"
touch "\${PREFIX}.krakenuniq.report.txt"
touch "\${PREFIX}.unclassified_1.fastq.gz" "\${PREFIX}.unclassified_2.fastq.gz"
done
echo $compress_reads_command
cat <<-END_VERSIONS > versions.yml
"${task.process}":
krakenuniq: \$(echo \$(krakenuniq --version 2>&1) | sed 's/^.*KrakenUniq version //; s/ .*\$//')
END_VERSIONS
"""
}
}

View file

@ -0,0 +1,78 @@
name: "krakenuniq_preloadedkrakenuniq"
description: Classifies metagenomic sequence data using unique k-mer counts
keywords:
- classify
- metagenomics
- kmers
- fastq
- db
tools:
- "krakenuniq":
description: "Metagenomics classifier with unique k-mer counting for more specific results"
homepage: https://github.com/fbreitwieser/krakenuniq
documentation: https://github.com/fbreitwieser/krakenuniq
doi: 10.1186/s13059-018-1568-0
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- fastqs:
type: file
description: List of input FastQ files
- db:
type: directory
description: KrakenUniq database
- ram_chunk_size:
type: val
description: Amount of maximum amount of RAM each chunk of database that should be loaded at any one time
pattern: "*GB"
- save_output_fastqs:
type: boolean
description: |
If true, optional commands are added to save classified and unclassified reads
as fastq files
- save_reads_assignment:
type: boolean
description: |
If true, an optional command is added to save a file reporting the taxonomic
classification of each input read
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- classified_reads_fastq:
type: file
description: |
Reads classified as belonging to any of the taxa
on the KrakenUniq database.
pattern: "*.fastq.gz"
- unclassified_reads_fastq:
type: file
description: |
Reads not classified to any of the taxa
on the KrakenUniq database.
pattern: "*.fastq.gz"
- classified_assignment:
type: file
description: |
KrakenUniq output file indicating the taxonomic assignment of
each input read ## DOUBLE CHECK!!
- report:
type: file
description: |
KrakenUniq report containing stats about classified
and not classifed reads.
pattern: "*.report.txt"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@mjamy"
- "@Midnighter"

View file

@ -2,14 +2,13 @@ process MALT_RUN {
tag "$meta.id"
label 'process_high'
conda (params.enable_conda ? "bioconda::malt=0.41" : null)
conda (params.enable_conda ? "bioconda::malt=0.61" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/malt:0.41--1' :
'quay.io/biocontainers/malt:0.41--1' }"
'https://depot.galaxyproject.org/singularity/malt:0.61--hdfd78af_0' :
'quay.io/biocontainers/malt:0.61--hdfd78af_0' }"
input:
tuple val(meta), path(fastqs)
val mode
path index
output:
@ -38,7 +37,6 @@ process MALT_RUN {
-o . \\
$args \\
--inFile ${fastqs.join(' ')} \\
-m $mode \\
--index $index/ |&tee ${prefix}-malt-run.log
cat <<-END_VERSIONS > versions.yml

View file

@ -28,10 +28,6 @@ input:
type: file
description: Input FASTQ files
pattern: "*.{fastq.gz,fq.gz}"
- mode:
type: string
description: Program mode
pattern: "Unknown|BlastN|BlastP|BlastX|Classifier"
- index:
type: directory
description: Index/database directory from malt-build

View file

@ -1,4 +1,4 @@
process PORECHOP {
process PORECHOP_PORECHOP {
tag "$meta.id"
label 'process_medium'
@ -28,7 +28,6 @@ process PORECHOP {
$args \\
-o ${prefix}.fastq.gz \\
> ${prefix}.log
cat <<-END_VERSIONS > versions.yml
"${task.process}":
porechop: \$( porechop --version )

View file

@ -1,4 +1,4 @@
name: porechop
name: "porechop_porechop"
description: Adapter removal and demultiplexing of Oxford Nanopore reads
keywords:
- adapter

48
modules/nf-core/samtools/index/main.nf generated Normal file
View file

@ -0,0 +1,48 @@
process SAMTOOLS_INDEX {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' :
'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }"
input:
tuple val(meta), path(input)
output:
tuple val(meta), path("*.bai") , optional:true, emit: bai
tuple val(meta), path("*.csi") , optional:true, emit: csi
tuple val(meta), path("*.crai"), optional:true, emit: crai
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
"""
samtools \\
index \\
-@ ${task.cpus-1} \\
$args \\
$input
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
stub:
"""
touch ${input}.bai
touch ${input}.crai
touch ${input}.csi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
}

53
modules/nf-core/samtools/index/meta.yml generated Normal file
View file

@ -0,0 +1,53 @@
name: samtools_index
description: Index SAM/BAM/CRAM file
keywords:
- index
- bam
- sam
- cram
tools:
- samtools:
description: |
SAMtools is a set of utilities for interacting with and post-processing
short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
These files are generated as output by short read aligners like BWA.
homepage: http://www.htslib.org/
documentation: hhttp://www.htslib.org/doc/samtools.html
doi: 10.1093/bioinformatics/btp352
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bai:
type: file
description: BAM/CRAM/SAM index file
pattern: "*.{bai,crai,sai}"
- crai:
type: file
description: BAM/CRAM/SAM index file
pattern: "*.{bai,crai,sai}"
- csi:
type: file
description: CSI index file
pattern: "*.{csi}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@drpatelh"
- "@ewels"
- "@maxulysse"

49
modules/nf-core/samtools/stats/main.nf generated Normal file
View file

@ -0,0 +1,49 @@
process SAMTOOLS_STATS {
tag "$meta.id"
label 'process_single'
conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' :
'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }"
input:
tuple val(meta), path(input), path(input_index)
path fasta
output:
tuple val(meta), path("*.stats"), emit: stats
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def reference = fasta ? "--reference ${fasta}" : ""
"""
samtools \\
stats \\
--threads ${task.cpus} \\
${reference} \\
${input} \\
> ${prefix}.stats
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.stats
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
}

53
modules/nf-core/samtools/stats/meta.yml generated Normal file
View file

@ -0,0 +1,53 @@
name: samtools_stats
description: Produces comprehensive statistics from SAM/BAM/CRAM file
keywords:
- statistics
- counts
- bam
- sam
- cram
tools:
- samtools:
description: |
SAMtools is a set of utilities for interacting with and post-processing
short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
These files are generated as output by short read aligners like BWA.
homepage: http://www.htslib.org/
documentation: hhttp://www.htslib.org/doc/samtools.html
doi: 10.1093/bioinformatics/btp352
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- input:
type: file
description: BAM/CRAM file from alignment
pattern: "*.{bam,cram}"
- input_index:
type: file
description: BAI/CRAI file from alignment
pattern: "*.{bai,crai}"
- fasta:
type: optional file
description: Reference file the CRAM was created with
pattern: "*.{fasta,fa}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- stats:
type: file
description: File containing samtools stats output
pattern: "*.{stats}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@drpatelh"
- "@FriederikeHanssen"

View file

@ -60,13 +60,16 @@ params {
databases = null
// FASTQ preprocessing
preprocessing_qc_tool = 'fastqc'
perform_shortread_qc = false
shortread_qc_tool = 'fastp'
shortread_qc_skipadaptertrim = false
shortread_qc_mergepairs = true
shortread_qc_excludeunmerged = false
shortread_qc_mergepairs = false
shortread_qc_includeunmerged = false
shortread_qc_adapter1 = null
shortread_qc_adapter2 = null
shortread_qc_adapterlist = null
shortread_qc_minlength = 15
perform_longread_qc = false
@ -114,6 +117,16 @@ params {
run_kraken2 = false
kraken2_save_reads = false // added directly to module in profiling.nf
kraken2_save_readclassification = false // added directly to module in profiling.nf
kraken2_save_minimizers = false
//krakenuniq
run_krakenuniq = false
krakenuniq_ram_chunk_size = '16G'
krakenuniq_save_reads = false // added directly to module in profiling.nf
krakenuniq_save_readclassifications = false // added directly to module in profiling.nf
// Bracken
run_bracken = false
// centrifuge
run_centrifuge = false
@ -132,7 +145,10 @@ params {
diamond_save_reads = false // this will override default diamond output format so no taxonomic profile is generated! added directly to module in profiling.nf
// mOTUs
run_motus = false
run_motus = false
motus_use_relative_abundance = false
motus_remove_ncbi_ids = false
motus_save_mgc_read_counts = false
// krona
run_krona = false
@ -229,6 +245,7 @@ profiles {
test_nopreprocessing { includeConfig 'conf/test_nopreprocessing.config' }
test_nothing { includeConfig 'conf/test_nothing.config' }
test_motus { includeConfig 'conf/test_motus.config' }
test_krakenuniq { includeConfig 'conf/test_krakenuniq.config' }
test_pep { includeConfig 'conf/test_pep.config' }
}

View file

@ -10,7 +10,7 @@
"type": "object",
"fa_icon": "fas fa-terminal",
"description": "Define where the pipeline should find input data and save output data.",
"required": ["input", "outdir", "databases"],
"required": ["input", "databases", "outdir"],
"properties": {
"input": {
"type": "string",
@ -56,6 +56,14 @@
"description": "Common options across both long and short read preprocessing QC steps",
"default": "",
"properties": {
"preprocessing_qc_tool": {
"type": "string",
"default": "fastqc",
"enum": ["fastqc", "falco"],
"help_text": "Falco is designed as a drop-in replacement for FastQC but written in C++ for faster computation. We particularly recommend using falco when using long reads (due to reduced memory constraints), however is also applicable for short reads.",
"description": "Specify the tool used for quality control of raw sequencing reads",
"fa_icon": "fas fa-tools"
},
"save_preprocessed_reads": {
"type": "boolean",
"fa_icon": "fas fa-save",
@ -104,18 +112,23 @@
"description": "Specify adapter 2 nucleotide sequence",
"help_text": "Specify a custom reverse or R2 adapter sequence to be removed from reads. \n\nIf not set, the selected short-read QC tool's defaults will be used.\n\n> Modifies tool parameter(s):\n> - fastp: `--adapter_sequence`. fastp default: `AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT`\n> - AdapterRemoval: `--adapter1`. AdapteRemoval2 default: `AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT`"
},
"shortread_qc_adapterlist": {
"type": "string",
"default": "None",
"description": "Specify a list of all possible adapters to trim. Overrides --shortread_qc_adapter1/2. Formats: .txt (AdapterRemoval) or .fasta. (fastp).",
"help_text": "Allows to supply a file with a list of adapter (combinations) to remove from all files. \n\nOverrides the --shortread_qc_adapter1/--shortread_qc_adapter2 parameters . \n\nFor AdapterRemoval this consists of a two column table with a `.txt` extension: first column represents forward strand, second column for reverse strand. You must supply all possible combinations, one per line, and this list is applied to all files. See AdapterRemoval documentation for more information.\n\nFor fastp this consists of a standard FASTA format with a `.fasta`/`.fa`/`.fna`/`.fas` extension. The adapter sequence in this file should be at least 6bp long, otherwise it will be skipped. fastp trims the adapters present in the FASTA file one by one.\n\n> Modifies AdapterRemoval parameter: --adapter-list\n> Modifies fastp parameter: --adapter_fasta"
},
"shortread_qc_mergepairs": {
"type": "boolean",
"fa_icon": "fas fa-toggle-on",
"description": "Turn on merging of read pairs for paired-end data",
"default": true,
"help_text": "Turn on the merging of read-pairs of paired-end short read sequencing data for AdapterRemoval (this is performed automatically with fastp).\n\n> Modifies tool parameter(s):\n> - AdapterRemoval: `--collapse`\n"
"help_text": "Turn on the merging of read-pairs of paired-end short read sequencing data. \n\n> Modifies tool parameter(s):\n> - AdapterRemoval: `--collapse`\n> - fastp: `-m --merged_out`\n"
},
"shortread_qc_excludeunmerged": {
"shortread_qc_includeunmerged": {
"type": "boolean",
"fa_icon": "far fa-times-circle",
"description": "Discard unmerged reads from paired-end merging",
"help_text": "Turns off the inclusion of unmerged reads in resulting processing FASTQ file of paired-end sequencing data when using `fastp`.\n\nThis can be useful in cases where you prefer to have very short reads (e.g. aDNA), thus excluding longer-reads or possibly faulty reads where one of the pair was discarded.\n\n> Modifies tool parameter(s):\n> - removed from reads `--include_unmerged`\n"
"description": "Include unmerged reads from paired-end merging in the downstream analysis",
"help_text": "Turns on the inclusion of unmerged reads in resulting FASTQ file from merging paired-end sequencing data when using `fastp` and/or `AdapterRemoval`. For `fastp` this means the unmerged read pairs are directly included in the output FASTQ file. For `AdapterRemoval`, additional output files containing unmerged reads are all concatenated into one file by the workflow.\n\nExcluding unmerged reads can be useful in cases where you prefer to have very short reads (e.g. aDNA), thus excluding longer-reads or possibly faulty reads where one of the pair was discarded.\n\n> Adds `fastp` option: `--include_unmerged`\n"
},
"shortread_qc_minlength": {
"type": "integer",
@ -382,6 +395,41 @@
"description": "Turn on saving of Kraken2 per-read taxonomic assignment file",
"help_text": "Save a text file that contains a list of each read that had a taxonomic assignment, with information on specific taxonomic taxonomic assignment that that read recieved.\n\n> Modifies tool parameter(s):\n> - kraken2: `--output`"
},
"kraken2_save_minimizers": {
"type": "boolean",
"description": "Turn on saving minimizer information in the kraken2 report thus increasing to an eight column layout.",
"fa_icon": "fas fa-save",
"help_text": "Turn on saving minimizer information in the kraken2 report thus increasing to an eight column layout.\n\nAdds `--report-minimizer-data` to the kraken2 command."
},
"run_krakenuniq": {
"type": "boolean",
"fa_icon": "fas fa-toggle-on",
"description": "Turn on profiling with KrakenUniq. Requires database to be present CSV file passed to --databases"
},
"krakenuniq_save_reads": {
"type": "boolean",
"fa_icon": "fas fa-save",
"description": "Turn on saving of KrakenUniq-aligned reads",
"help_text": "Save reads that do and do not have a taxonomic classification in your output results directory in FASTQ format.\n\n> Modifies tool parameter(s):\n> - krakenuniq: `--classified-out` and `--unclassified-out`"
},
"krakenuniq_ram_chunk_size": {
"type": "string",
"default": "16G",
"description": "Specify how large to chunk database when loading into memory for KrakenUniq",
"fa_icon": "fas fa-database",
"help_text": "nf-core/taxprofiler utilises a 'low memory' option for KrakenUniq that can reduce the amount of RAM the process requires using the `--preloaded` option.\n\nA further extension to this option is that you can specify how large each chunk of the database should be that gets loaded into memory at any one time. You can specify the amount of RAM to chunk the database to with this parameter, and is particularly useful for people with limited computational resources.\n\nMore information about this parameter can be seen [here](https://github.com/fbreitwieser/krakenuniq/blob/master/README.md#new-release-v07).\n\n> Modifies KrakenUniq parameter: --preload\n\n> \n\n"
},
"krakenuniq_save_readclassifications": {
"type": "boolean",
"fa_icon": "fas fa-save",
"description": "Turn on saving of KrakenUniq per-read taxonomic assignment file",
"help_text": "Save a text file that contains a list of each read that had a taxonomic assignment, with information on specific taxonomic taxonomic assignment that that read recieved.\n\n> Modifies tool parameter(s):\n> - krakenuniq: `--output`"
},
"run_bracken": {
"type": "boolean",
"description": "Post-process kraken2 reports with Bracken.",
"fa_icon": "fas fa-toggle-on"
},
"run_malt": {
"type": "boolean",
"fa_icon": "fas fa-toggle-on",
@ -415,6 +463,18 @@
"type": "boolean",
"fa_icon": "fas fa-toggle-on",
"description": "Turn on profiling with mOTUs. Requires database to be present CSV file passed to --databases"
},
"motus_use_relative_abundance": {
"type": "boolean",
"description": "Turn on printing relative abundance instead of counts."
},
"motus_save_mgc_read_counts": {
"type": "boolean",
"description": "Turn on saving the mgc reads count."
},
"motus_remove_ncbi_ids": {
"type": "boolean",
"description": "Turn on removing NCBI taxonomic IDs."
}
},
"fa_icon": "fas fa-align-center"

View file

@ -6,6 +6,8 @@ include { MINIMAP2_INDEX } from '../../modules/nf-core/minimap2/inde
include { MINIMAP2_ALIGN } from '../../modules/nf-core/minimap2/align/main'
include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main'
include { SAMTOOLS_BAM2FQ } from '../../modules/nf-core/samtools/bam2fq/main'
include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main'
include { SAMTOOLS_STATS } from '../../modules/nf-core/samtools/stats/main'
workflow LONGREAD_HOSTREMOVAL {
take:
@ -39,9 +41,21 @@ workflow LONGREAD_HOSTREMOVAL {
SAMTOOLS_BAM2FQ ( SAMTOOLS_VIEW.out.bam, false )
ch_versions = ch_versions.mix( SAMTOOLS_BAM2FQ.out.versions.first() )
SAMTOOLS_INDEX ( SAMTOOLS_VIEW.out.bam )
ch_versions = ch_versions.mix( SAMTOOLS_INDEX.out.versions.first() )
bam_bai = MINIMAP2_ALIGN.out.bam
.join(SAMTOOLS_INDEX.out.bai, remainder: true)
SAMTOOLS_STATS ( bam_bai, reference )
ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first())
ch_multiqc_files = ch_multiqc_files.mix( SAMTOOLS_STATS.out.stats )
emit:
stats = SAMTOOLS_STATS.out.stats //channel: [val(meta), [reads ] ]
reads = SAMTOOLS_BAM2FQ.out.reads // channel: [ val(meta), [ reads ] ]
versions = ch_versions // channel: [ versions.yml ]
mqc = ch_multiqc_files
}

View file

@ -3,7 +3,9 @@
//
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/fastqc/main'
include { PORECHOP } from '../../modules/nf-core/porechop/main'
include { FALCO as FALCO_PROCESSED } from '../../modules/nf-core/falco/main'
include { PORECHOP_PORECHOP } from '../../modules/nf-core/porechop/porechop/main'
include { FILTLONG } from '../../modules/nf-core/filtlong/main'
workflow LONGREAD_PREPROCESSING {
@ -15,9 +17,9 @@ workflow LONGREAD_PREPROCESSING {
ch_multiqc_files = Channel.empty()
if ( !params.longread_qc_skipadaptertrim && params.longread_qc_skipqualityfilter) {
PORECHOP ( reads )
PORECHOP_PORECHOP ( reads )
ch_processed_reads = PORECHOP.out.reads
ch_processed_reads = PORECHOP_PORECHOP.out.reads
.map {
meta, reads ->
def meta_new = meta.clone()
@ -25,8 +27,8 @@ workflow LONGREAD_PREPROCESSING {
[ meta_new, reads ]
}
ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
ch_multiqc_files = ch_multiqc_files.mix( PORECHOP.out.log )
ch_versions = ch_versions.mix(PORECHOP_PORECHOP.out.versions.first())
ch_multiqc_files = ch_multiqc_files.mix( PORECHOP_PORECHOP.out.log )
} else if ( params.longread_qc_skipadaptertrim && !params.longread_qc_skipqualityfilter) {
@ -35,8 +37,8 @@ workflow LONGREAD_PREPROCESSING {
ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log )
} else {
PORECHOP ( reads )
ch_clipped_reads = PORECHOP.out.reads
PORECHOP_PORECHOP ( reads )
ch_clipped_reads = PORECHOP_PORECHOP.out.reads
.map {
meta, reads ->
def meta_new = meta.clone()
@ -46,14 +48,22 @@ workflow LONGREAD_PREPROCESSING {
ch_processed_reads = FILTLONG ( ch_clipped_reads.map{ meta, reads -> [meta, [], reads ]} ).reads
ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
ch_versions = ch_versions.mix(PORECHOP_PORECHOP.out.versions.first())
ch_versions = ch_versions.mix(FILTLONG.out.versions.first())
ch_multiqc_files = ch_multiqc_files.mix( PORECHOP.out.log )
ch_multiqc_files = ch_multiqc_files.mix( PORECHOP_PORECHOP.out.log )
ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log )
}
FASTQC_PROCESSED ( ch_processed_reads )
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
if (params.preprocessing_qc_tool == 'fastqc') {
FASTQC_PROCESSED ( ch_processed_reads )
ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
} else if (params.preprocessing_qc_tool == 'falco') {
FALCO_PROCESSED ( ch_processed_reads )
ch_versions = ch_versions.mix( FALCO_PROCESSED.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( FALCO_PROCESSED.out.txt )
}
emit:
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]

View file

@ -5,12 +5,15 @@
include { MALT_RUN } from '../../modules/nf-core/malt/run/main'
include { MEGAN_RMA2INFO as MEGAN_RMA2INFO_TSV } from '../../modules/nf-core/megan/rma2info/main'
include { KRAKEN2_KRAKEN2 } from '../../modules/nf-core/kraken2/kraken2/main'
include { KRAKEN2_STANDARD_REPORT } from '../../modules/local/kraken2_standard_report'
include { BRACKEN_BRACKEN } from '../../modules/nf-core/bracken/bracken/main'
include { CENTRIFUGE_CENTRIFUGE } from '../../modules/nf-core/centrifuge/centrifuge/main'
include { CENTRIFUGE_KREPORT } from '../../modules/nf-core/centrifuge/kreport/main'
include { METAPHLAN3_METAPHLAN3 } from '../../modules/nf-core/metaphlan3/metaphlan3/main'
include { KAIJU_KAIJU } from '../../modules/nf-core/kaiju/kaiju/main'
include { DIAMOND_BLASTX } from '../../modules/nf-core/diamond/blastx/main'
include { MOTUS_PROFILE } from '../../modules/nf-core/motus/profile/main'
include { KRAKENUNIQ_PRELOADEDKRAKENUNIQ } from '../../modules/nf-core/krakenuniq/preloadedkrakenuniq/main'
workflow PROFILING {
take:
@ -39,12 +42,13 @@ workflow PROFILING {
.combine(databases)
.branch {
malt: it[2]['tool'] == 'malt'
kraken2: it[2]['tool'] == 'kraken2'
kraken2: it[2]['tool'] == 'kraken2' || it[2]['tool'] == 'bracken' // to reuse the kraken module to produce the input data for bracken
metaphlan3: it[2]['tool'] == 'metaphlan3'
centrifuge: it[2]['tool'] == 'centrifuge'
kaiju: it[2]['tool'] == 'kaiju'
diamond: it[2]['tool'] == 'diamond'
motus: it[2]['tool'] == 'motus'
krakenuniq: it[2]['tool'] == 'krakenuniq'
unknown: true
}
@ -93,7 +97,7 @@ workflow PROFILING {
db: it[2]
}
MALT_RUN ( ch_input_for_malt.reads, params.malt_mode, ch_input_for_malt.db )
MALT_RUN ( ch_input_for_malt.reads, ch_input_for_malt.db )
ch_maltrun_for_megan = MALT_RUN.out.rma6
.transpose()
@ -129,7 +133,46 @@ workflow PROFILING {
ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.report )
ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() )
ch_raw_classifications = ch_raw_classifications.mix( KRAKEN2_KRAKEN2.out.classified_reads_assignment )
ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.report )
ch_raw_profiles = ch_raw_profiles.mix(
KRAKEN2_KRAKEN2.out.report
// Set the tool to be strictly 'kraken2' instead of potentially 'bracken' for downstream use.
// Will remain distinct from 'pure' Kraken2 results due to distinct database names in file names.
.map { meta, report -> [meta + [tool: 'kraken2'], report]}
)
}
if ( params.run_kraken2 && params.run_bracken ) {
// Remove files from 'pure' kraken2 runs, so only those aligned against Bracken & kraken2 database are used.
def ch_kraken2_output = KRAKEN2_KRAKEN2.out.report
.filter {
meta, report ->
if ( meta['instrument_platform'] == 'OXFORD_NANOPORE' ) log.warn "[nf-core/taxprofiler] Bracken has not been evaluated for Nanopore data. Skipping Bracken for sample ${meta.id}."
meta['tool'] == 'bracken' && meta['instrument_platform'] != 'OXFORD_NANOPORE'
}
// If necessary, convert the eight column output to six column output.
if (params.kraken2_save_minimizers) {
ch_kraken2_output = KRAKEN2_STANDARD_REPORT(ch_kraken2_output).report
}
// Extract the database name to combine by.
ch_bracken_databases = databases
.filter { meta, db -> meta['tool'] == 'bracken' }
.map { meta, db -> [meta['db_name'], meta, db] }
// Extract the database name to combine by.
ch_input_for_bracken = ch_kraken2_output
.map { meta, report -> [meta['db_name'], meta, report] }
.combine(ch_bracken_databases, by: 0)
.multiMap { key, meta, report, db_meta, db ->
report: [meta + db_meta, report]
db: db
}
BRACKEN_BRACKEN(ch_input_for_bracken.report, ch_input_for_bracken.db)
ch_versions = ch_versions.mix(BRACKEN_BRACKEN.out.versions.first())
ch_raw_profiles = ch_raw_profiles.mix(BRACKEN_BRACKEN.out.reports)
}
@ -228,6 +271,28 @@ workflow PROFILING {
ch_multiqc_files = ch_multiqc_files.mix( MOTUS_PROFILE.out.log )
}
if ( params.run_krakenuniq ) {
ch_input_for_krakenuniq = ch_input_for_profiling.krakenuniq
.map {
meta, reads, db_meta, db ->
[[id: db_meta.db_name, single_end: meta.single_end], reads, db_meta, db]
}
.groupTuple(by: [0,2,3])
.dump(tag: "krakenuniq_premultimap")
.multiMap {
single_meta, reads, db_meta, db ->
reads: [ single_meta + db_meta, reads.flatten() ]
db: db
}
// Hardcode to _always_ produce the report file (which is our basic otput, and goes into)
KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads.dump(tag: "krakenuniq_input"), ch_input_for_krakenuniq.db.dump(tag: "krakenuniq_db"), params.krakenuniq_ram_chunk_size, params.krakenuniq_save_reads, true, params.krakenuniq_save_readclassifications )
ch_multiqc_files = ch_multiqc_files.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report )
ch_versions = ch_versions.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.versions.first() )
ch_raw_classifications = ch_raw_classifications.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.classified_assignment )
ch_raw_profiles = ch_raw_profiles.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report )
}
emit:
classifications = ch_raw_classifications
profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom

View file

@ -10,6 +10,7 @@ workflow SHORTREAD_ADAPTERREMOVAL {
take:
reads // [[meta], [reads]]
adapterlist // file
main:
ch_versions = Channel.empty()
@ -21,15 +22,15 @@ workflow SHORTREAD_ADAPTERREMOVAL {
paired: !it[0].single_end
}
ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, [] )
ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, [] )
ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, adapterlist )
ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, adapterlist )
/*
* Due to the ~slightly~ very ugly output implementation of the current AdapterRemoval2 version, each file
* has to be exported in a separate channel and we must manually recombine when necessary.
*/
if ( params.shortread_qc_mergepairs && !params.shortread_qc_excludeunmerged ) {
if ( params.shortread_qc_mergepairs && params.shortread_qc_includeunmerged ) {
ch_concat_fastq = Channel.empty()
.mix(
@ -39,9 +40,7 @@ workflow SHORTREAD_ADAPTERREMOVAL {
ADAPTERREMOVAL_PAIRED.out.paired_truncated
)
.map { meta, reads ->
def meta_new = meta.clone()
meta_new.single_end = true
[meta_new, reads]
[meta + [single_end: true], reads]
}
.groupTuple()
// Paired-end reads cause a nested tuple during grouping.
@ -54,7 +53,7 @@ workflow SHORTREAD_ADAPTERREMOVAL {
ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
.mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
} else if ( params.shortread_qc_mergepairs && params.shortread_qc_excludeunmerged ) {
} else if ( params.shortread_qc_mergepairs && !params.shortread_qc_includeunmerged ) {
ch_concat_fastq = Channel.empty()
.mix(
@ -62,9 +61,7 @@ workflow SHORTREAD_ADAPTERREMOVAL {
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated
)
.map { meta, reads ->
def meta_new = meta.clone()
meta_new.single_end = true
[meta_new, reads]
[meta + [single_end: true], reads]
}
.groupTuple()
.map { meta, fastq -> [meta, fastq.flatten()] }

View file

@ -8,6 +8,7 @@ include { FASTP as FASTP_PAIRED } from '../../modules/nf-core/fastp/main'
workflow SHORTREAD_FASTP {
take:
reads // [[meta], [reads]]
adapterlist
main:
ch_versions = Channel.empty()
@ -19,9 +20,9 @@ workflow SHORTREAD_FASTP {
paired: it[0]['single_end'] == false
}
FASTP_SINGLE ( ch_input_for_fastp.single, false, false )
FASTP_SINGLE ( ch_input_for_fastp.single, adapterlist, false, false )
// Last parameter here turns on merging of PE data
FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_qc_mergepairs )
FASTP_PAIRED ( ch_input_for_fastp.paired, adapterlist, false, params.shortread_qc_mergepairs )
if ( params.shortread_qc_mergepairs ) {
ch_fastp_reads_prepped_pe = FASTP_PAIRED.out.reads_merged

View file

@ -4,6 +4,9 @@
include { BOWTIE2_BUILD } from '../../modules/nf-core/bowtie2/build/main'
include { BOWTIE2_ALIGN } from '../../modules/nf-core/bowtie2/align/main'
include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main'
include { SAMTOOLS_STATS } from '../../modules/nf-core/samtools/stats/main'
include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main'
workflow SHORTREAD_HOSTREMOVAL {
take:
@ -22,11 +25,31 @@ workflow SHORTREAD_HOSTREMOVAL {
ch_bowtie2_index = index.first()
}
BOWTIE2_ALIGN ( reads, ch_bowtie2_index, true, false )
BOWTIE2_ALIGN ( reads, ch_bowtie2_index, true, true)
ch_versions = ch_versions.mix( BOWTIE2_ALIGN.out.versions.first() )
ch_multiqc_files = ch_multiqc_files.mix( BOWTIE2_ALIGN.out.log )
ch_bowtie2_mapped = BOWTIE2_ALIGN.out.bam
.map {
meta, reads ->
[ meta, reads, [] ]
}
SAMTOOLS_VIEW ( ch_bowtie2_mapped, [], [] )
ch_versions = ch_versions.mix( SAMTOOLS_VIEW.out.versions.first() )
SAMTOOLS_INDEX ( SAMTOOLS_VIEW.out.bam )
ch_versions = ch_versions.mix( SAMTOOLS_INDEX.out.versions.first() )
bam_bai = BOWTIE2_ALIGN.out.bam
.join(SAMTOOLS_INDEX.out.bai, remainder: true)
SAMTOOLS_STATS ( bam_bai, reference )
ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first())
ch_multiqc_files = ch_multiqc_files.mix( SAMTOOLS_STATS.out.stats )
emit:
stats = SAMTOOLS_STATS.out.stats
reads = BOWTIE2_ALIGN.out.fastq // channel: [ val(meta), [ reads ] ]
versions = ch_versions // channel: [ versions.yml ]
mqc = ch_multiqc_files

View file

@ -5,31 +5,39 @@
include { SHORTREAD_FASTP } from './shortread_fastp'
include { SHORTREAD_ADAPTERREMOVAL } from './shortread_adapterremoval'
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/fastqc/main'
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/fastqc/main'
include { FALCO as FALCO_PROCESSED } from '../../modules/nf-core/falco/main'
workflow SHORTREAD_PREPROCESSING {
take:
reads // [ [ meta ], [ reads ] ]
adapterlist // file
main:
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()
if ( params.shortread_qc_tool == "fastp" ) {
ch_processed_reads = SHORTREAD_FASTP ( reads ).reads
ch_processed_reads = SHORTREAD_FASTP ( reads, adapterlist ).reads
ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc )
} else if ( params.shortread_qc_tool == "adapterremoval" ) {
ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads
ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads, adapterlist ).reads
ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc )
} else {
ch_processed_reads = reads
}
FASTQC_PROCESSED ( ch_processed_reads )
ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
if (params.preprocessing_qc_tool == 'fastqc') {
FASTQC_PROCESSED ( ch_processed_reads )
ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
} else if (params.preprocessing_qc_tool == 'falco') {
FALCO_PROCESSED ( ch_processed_reads )
ch_versions = ch_versions.mix( FALCO_PROCESSED.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( FALCO_PROCESSED.out.txt )
}
emit:
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]

View file

@ -3,7 +3,7 @@
//
include { KAIJU_KAIJU2TABLE } from '../../modules/nf-core/kaiju/kaiju2table/main'
include { KRAKENTOOLS_COMBINEKREPORTS } from '../../modules/nf-core/krakentools/combinekreports/main'
include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_KRAKEN } from '../../modules/nf-core/krakentools/combinekreports/main'
include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE } from '../../modules/nf-core/krakentools/combinekreports/main'
include { METAPHLAN3_MERGEMETAPHLANTABLES } from '../../modules/nf-core/metaphlan3/mergemetaphlantables/main'
include { MOTUS_MERGE } from '../../modules/nf-core/motus/merge/main'
@ -93,10 +93,10 @@ workflow STANDARDISATION_PROFILES {
[[id:it[0]], it[1]]
}
KRAKENTOOLS_COMBINEKREPORTS ( ch_profiles_for_kraken2 )
ch_standardised_tables = ch_standardised_tables.mix( KRAKENTOOLS_COMBINEKREPORTS.out.txt )
ch_multiqc_files = ch_multiqc_files.mix( KRAKENTOOLS_COMBINEKREPORTS.out.txt )
ch_versions = ch_versions.mix( KRAKENTOOLS_COMBINEKREPORTS.out.versions )
KRAKENTOOLS_COMBINEKREPORTS_KRAKEN ( ch_profiles_for_kraken2 )
ch_standardised_tables = ch_standardised_tables.mix( KRAKENTOOLS_COMBINEKREPORTS_KRAKEN.out.txt )
ch_multiqc_files = ch_multiqc_files.mix( KRAKENTOOLS_COMBINEKREPORTS_KRAKEN.out.txt )
ch_versions = ch_versions.mix( KRAKENTOOLS_COMBINEKREPORTS_KRAKEN.out.versions )
// MetaPhlAn3

View file

@ -12,7 +12,8 @@ WorkflowTaxprofiler.initialise(params, log)
// TODO nf-core: Add all file path parameters for the pipeline to the list below
// Check input path parameters to see if they exist
def checkPathParamList = [ params.input, params.databases, params.hostremoval_reference,
params.shortread_hostremoval_index, params.multiqc_config
params.shortread_hostremoval_index, params.multiqc_config,
params.shortread_qc_adapterlist
]
for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }
@ -27,7 +28,7 @@ if ( params.input ) {
if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
if (params.shortread_qc_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files."
if (params.shortread_qc_excludeunmerged && !params.shortread_qc_mergepairs) exit 1, "ERROR: [nf-core/taxprofiler] cannot include unmerged reads when merging not turned on. Please specify --shortread_qc_mergepairs"
if (params.shortread_qc_includeunmerged && !params.shortread_qc_mergepairs) exit 1, "ERROR: [nf-core/taxprofiler] cannot include unmerged reads when merging is not turned on. Please specify --shortread_qc_mergepairs"
if (params.shortread_complexityfilter_tool == 'fastp' && ( params.perform_shortread_qc == false || params.shortread_qc_tool != 'fastp' )) exit 1, "ERROR: [nf-core/taxprofiler] cannot use fastp complexity filtering if preprocessing not turned on and/or tool is not fastp. Please specify --perform_shortread_qc and/or --shortread_qc_tool 'fastp'"
@ -41,6 +42,7 @@ if (params.longread_hostremoval_index ) { ch_longread_reference_index = fi
if (params.diamond_save_reads ) log.warn "[nf-core/taxprofiler] DIAMOND only allows output of a single format. As --diamond_save_reads supplied, only aligned reads in SAM format will be produced, no taxonomic profiles will be available."
if (params.run_malt && params.run_krona && !params.krona_taxonomy_directory) log.warn "[nf-core/taxprofiler] Krona can only be run on MALT output if path to Krona taxonomy database supplied to --krona_taxonomy_directory. Krona will not be executed in this run for MALT."
if (params.run_bracken && !params.run_kraken2) exit 1, 'ERROR: [nf-core/taxprofiler] You are attempting to run Bracken without running kraken2. This is not possible! Please set --run_kraken2 as well.'
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -84,6 +86,7 @@ include { STANDARDISATION_PROFILES } from '../subworkflows/local/standardis
// MODULE: Installed directly from nf-core/modules
//
include { FASTQC } from '../modules/nf-core/fastqc/main'
include { FALCO } from '../modules/nf-core/falco/main'
include { MULTIQC } from '../modules/nf-core/multiqc/main'
include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main'
@ -101,6 +104,12 @@ workflow TAXPROFILER {
ch_versions = Channel.empty()
ch_multiqc_logo= Channel.fromPath("$projectDir/docs/images/nf-core-taxprofiler_logo_custom_light.png")
adapterlist = params.shortread_qc_adapterlist ? file(params.shortread_qc_adapterlist) : []
if ( params.shortread_qc_adapterlist ) {
if ( params.shortread_qc_tool == 'adapterremoval' && !(adapterlist.extension == 'txt') ) error "[nf-core/taxprofiler] ERROR: AdapterRemoval2 adapter list requires a `.txt` format and extension. Check input: --shortread_qc_adapterlist ${params.shortread_qc_adapterlist}"
if ( params.shortread_qc_tool == 'fastp' && !adapterlist.extension.matches(".*(fa|fasta|fna|fas)") ) error "[nf-core/taxprofiler] ERROR: fastp adapter list requires a `.fasta` format and extension (or fa, fas, fna). Check input: --shortread_qc_adapterlist ${params.shortread_qc_adapterlist}"
}
/*
SUBWORKFLOW: Read in samplesheet, validate and stage input files
@ -120,17 +129,19 @@ workflow TAXPROFILER {
*/
ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore )
FASTQC (
ch_input_for_fastqc
)
ch_versions = ch_versions.mix(FASTQC.out.versions.first())
if ( params.preprocessing_qc_tool == 'falco' ) {
FALCO ( ch_input_for_fastqc )
ch_versions = ch_versions.mix(FALCO.out.versions.first())
} else {
FASTQC ( ch_input_for_fastqc )
ch_versions = ch_versions.mix(FASTQC.out.versions.first())
}
/*
SUBWORKFLOW: PERFORM PREPROCESSING
*/
if ( params.perform_shortread_qc ) {
ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq ).reads
ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq, adapterlist ).reads
ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions )
} else {
ch_shortreads_preprocessed = INPUT_CHECK.out.fastq
@ -254,7 +265,13 @@ workflow TAXPROFILER {
ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml'))
ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
if ( params.preprocessing_qc_tool == 'falco' ) {
ch_multiqc_files = ch_multiqc_files.mix(FALCO.out.txt.collect{it[1]}.ifEmpty([]))
} else {
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
}
if (params.perform_shortread_qc) {
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
@ -272,6 +289,10 @@ workflow TAXPROFILER {
ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_HOSTREMOVAL.out.mqc.collect{it[1]}.ifEmpty([]))
}
if (params.perform_longread_hostremoval) {
ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_HOSTREMOVAL.out.mqc.collect{it[1]}.ifEmpty([]))
}
ch_multiqc_files = ch_multiqc_files.mix( PROFILING.out.mqc.collect{it[1]}.ifEmpty([]) )
if ( params.run_profile_standardisation ) {