1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-22 13:29:56 +00:00

Merge branch 'dev' into update_motus

This commit is contained in:
sofstam 2022-12-01 18:49:23 +01:00
commit 6d140c683a
20 changed files with 518 additions and 22 deletions

View file

@ -101,3 +101,36 @@ jobs:
with: with:
command: nextflow run ${GITHUB_WORKSPACE} -profile test_motus,docker --outdir ./results --databases ./database_motus.csv command: nextflow run ${GITHUB_WORKSPACE} -profile test_motus,docker --outdir ./results --databases ./database_motus.csv
attempt_limit: 3 attempt_limit: 3
krakenuniq:
name: Test KrakenUniq with workflow parameters
if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/taxprofiler') }}
runs-on: ubuntu-latest
strategy:
matrix:
NXF_VER:
- "21.10.3"
- "latest-everything"
steps:
- name: Check out pipeline code
uses: actions/checkout@v2
- name: Install Nextflow
uses: nf-core/setup-nextflow@v1
with:
version: "${{ matrix.NXF_VER }}"
- name: Show current locale
run: locale
- name: Set UTF-8 enabled locale
run: |
sudo locale-gen en_US.UTF-8
sudo update-locale LANG=en_US.UTF-8
- name: Run pipeline with test data
uses: Wandalen/wretry.action@v1.0.11
with:
command: nextflow run ${GITHUB_WORKSPACE} -profile test_krakenuniq,docker --outdir ./results
attempt_limit: 3

View file

@ -36,6 +36,10 @@
> Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. Improved Metagenomic Analysis with Kraken 2. Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0. > Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. Improved Metagenomic Analysis with Kraken 2. Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0.
- [KrakenUniq](https://doi.org/10.1186/s13059-018-1568-0)
> Breitwieser, Florian P., Daniel N. Baker, and Steven L. Salzberg. 2018. KrakenUniq: confident and fast metagenomics classification using unique k-mer counts. Genome Biology 19 (1): 198. doi: 10.1186/s13059-018-1568-0
- [Bracken](https://doi.org/10.7717/peerj-cs.104) - [Bracken](https://doi.org/10.7717/peerj-cs.104)
> Lu, J., Breitwieser, F. P., Thielen, P., & Salzberg, S. L. (2017). Bracken: Estimating species abundance in metagenomics data. PeerJ Computer Science, 3, e104. doi: 10.7717/peerj-cs.104 > Lu, J., Breitwieser, F. P., Thielen, P., & Salzberg, S. L. (2017). Bracken: Estimating species abundance in metagenomics data. PeerJ Computer Science, 3, e104. doi: 10.7717/peerj-cs.104

View file

@ -281,7 +281,7 @@ process {
} }
withName: MALT_RUN { withName: MALT_RUN {
ext.args = { "${meta.db_params}" } ext.args = { "${meta.db_params} -m ${params.malt_mode}" }
// one run with multiple samples, so fix ID to just db name to ensure clean log name // one run with multiple samples, so fix ID to just db name to ensure clean log name
ext.prefix = { "${meta.db_name}" } ext.prefix = { "${meta.db_name}" }
publishDir = [ publishDir = [
@ -330,6 +330,17 @@ process {
] ]
} }
withName: KRAKENUNIQ_PRELOADEDKRAKENUNIQ {
ext.args = { "${meta.db_params}" }
// one run with multiple samples, so fix ID to just db name to ensure clean log name
ext.prefix = { "${meta.db_name}" }
publishDir = [
path: { "${params.outdir}/krakenuniq/${meta.db_name}/" },
mode: params.publish_dir_mode,
pattern: '*.{txt,report,fastq.gz}'
]
}
withName: KRONA_CLEANUP { withName: KRONA_CLEANUP {
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
publishDir = [ publishDir = [

View file

@ -39,6 +39,7 @@ params {
run_metaphlan3 = true run_metaphlan3 = true
run_centrifuge = true run_centrifuge = true
run_diamond = true run_diamond = true
run_krakenuniq = true
run_motus = false run_motus = false
run_krona = true run_krona = true
krona_taxonomy_directory = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab' krona_taxonomy_directory = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab'
@ -51,6 +52,7 @@ params {
process { process {
withName: MALT_RUN { withName: MALT_RUN {
maxForks = 1 maxForks = 1
ext.args = { "-m ${params.malt_mode} -J-Xmx12G" }
} }
withName: MEGAN_RMA2INFO_TSV { withName: MEGAN_RMA2INFO_TSV {
maxForks = 1 maxForks = 1

View file

@ -0,0 +1,72 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.
Use as follows:
nextflow run nf-core/taxprofiler -profile test,<docker/singularity> --outdir <OUTDIR>
----------------------------------------------------------------------------------------
*/
//
// Separate test as KrakenUniq database can sometimes be too big for GHA
//
params {
config_profile_name = 'Test profile'
config_profile_description = 'Minimal test to check KrakenUniq function'
// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = '6.GB'
max_time = '6.h'
// Input data
// TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
// TODO nf-core: Give any required params for the test so that command line flags are not needed
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_krakenuniq.csv'
perform_shortread_qc = true
perform_longread_qc = true
shortread_qc_mergepairs = true
perform_shortread_complexityfilter = true
perform_shortread_hostremoval = true
perform_longread_hostremoval = true
perform_runmerging = true
hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = false
run_kraken2 = false
run_bracken = false
run_malt = false
run_metaphlan3 = false
run_centrifuge = false
run_diamond = false
run_krakenuniq = true
run_motus = false
run_krona = true
krona_taxonomy_directory = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab'
malt_save_reads = true
kraken2_save_reads = true
centrifuge_save_reads = true
diamond_save_reads = true
}
process {
withName: MALT_RUN {
maxForks = 1
}
withName: MEGAN_RMA2INFO_TSV {
maxForks = 1
}
withName: MEGAN_RMA2INFO_KRONA {
maxForks = 1
}
withName: 'EIDO_VALIDATE' {
ext.args = '--st-index sample'
}
withName: 'EIDO_CONVERT' {
ext.args = '--st-index sample'
}
}

View file

@ -10,6 +10,10 @@
---------------------------------------------------------------------------------------- ----------------------------------------------------------------------------------------
*/ */
//
// Separate test as mOTUs database download can be flaky
//
params { params {
config_profile_name = 'mOTUs Test profile' config_profile_name = 'mOTUs Test profile'
config_profile_description = 'Minimal test to check mOTUs function' config_profile_description = 'Minimal test to check mOTUs function'
@ -38,6 +42,7 @@ params {
run_metaphlan3 = false run_metaphlan3 = false
run_centrifuge = false run_centrifuge = false
run_diamond = false run_diamond = false
run_krakenuniq = false
run_motus = true run_motus = true
motus_save_mgc_read_counts = true motus_save_mgc_read_counts = true
motus_remove_ncbi_ids = true motus_remove_ncbi_ids = true

View file

@ -38,6 +38,7 @@ params {
run_metaphlan3 = true run_metaphlan3 = true
run_centrifuge = true run_centrifuge = true
run_diamond = true run_diamond = true
run_krakenuniq = true
run_motus = false run_motus = false
run_krona = true run_krona = true
} }
@ -45,5 +46,6 @@ params {
process { process {
withName: MALT_RUN { withName: MALT_RUN {
maxForks = 1 maxForks = 1
ext.args = { "-m ${params.malt_mode} -J-Xmx12G" }
} }
} }

View file

@ -39,6 +39,7 @@ params {
run_metaphlan3 = false run_metaphlan3 = false
run_centrifuge = false run_centrifuge = false
run_diamond = false run_diamond = false
run_krakenuniq = false
run_motus = false run_motus = false
} }

View file

@ -38,11 +38,13 @@ params {
run_metaphlan3 = false run_metaphlan3 = false
run_centrifuge = false run_centrifuge = false
run_diamond = false run_diamond = false
run_krakenuniq = false
run_motus = false run_motus = false
} }
process { process {
withName: MALT_RUN { withName: MALT_RUN {
maxForks = 1 maxForks = 1
ext.args = { "-m ${params.malt_mode} -J-Xmx12G" }
} }
} }

View file

@ -37,6 +37,7 @@ params {
process { process {
withName: MALT_RUN { withName: MALT_RUN {
maxForks = 1 maxForks = 1
ext.args = { "-m ${params.malt_mode} -J-Xmx12G" }
} }
withName: MEGAN_RMA2INFO { withName: MEGAN_RMA2INFO {
maxForks = 1 maxForks = 1

View file

@ -74,7 +74,7 @@ The pipeline takes the locations and specific profiling parameters of the tool o
> ⚠️ nf-core/taxprofiler does not provide any databases by default, nor does it currently generate them for you. This must be performed manually by the user. See below for more information of the expected database files. > ⚠️ nf-core/taxprofiler does not provide any databases by default, nor does it currently generate them for you. This must be performed manually by the user. See below for more information of the expected database files.
An example database sheet can look as follows, where 4 tools are being used, and `malt` and `kraken2` will be used against two databases each. This is because specifying `bracken` implies first running `kraken2` on the same database. An example database sheet can look as follows, where 5 tools are being used, and `malt` and `kraken2` will be used against two databases each. This is because specifying `bracken` implies first running `kraken2` on the same database.
```console ```console
tool,db_name,db_params,db_path tool,db_name,db_params,db_path
@ -82,6 +82,7 @@ malt,malt85,-id 85,/<path>/<to>/malt/testdb-malt/
malt,malt95,-id 90,/<path>/<to>/malt/testdb-malt.tar.gz malt,malt95,-id 90,/<path>/<to>/malt/testdb-malt.tar.gz
bracken,db1,,/<path>/<to>/bracken/testdb-bracken.tar.gz bracken,db1,,/<path>/<to>/bracken/testdb-bracken.tar.gz
kraken2,db2,--quick,/<path>/<to>/kraken2/testdb-kraken2.tar.gz kraken2,db2,--quick,/<path>/<to>/kraken2/testdb-kraken2.tar.gz
krakenuniq,db3,,/<path>/<to>/krakenuniq/testdb-krakenuniq.tar.gz
centrifuge,db1,,/<path>/<to>/centrifuge/minigut_cf.tar.gz centrifuge,db1,,/<path>/<to>/centrifuge/minigut_cf.tar.gz
metaphlan3,db1,,/<path>/<to>/metaphlan3/metaphlan_database/ metaphlan3,db1,,/<path>/<to>/metaphlan3/metaphlan_database/
motus,db_mOTU,,/<path>/<to>/motus/motus_database/ motus,db_mOTU,,/<path>/<to>/motus/motus_database/
@ -125,6 +126,12 @@ Expected (uncompressed) database files for each tool are as follows:
- `database100mers.kraken` - `database100mers.kraken`
- `database150mers.kmer_distrib` - `database150mers.kmer_distrib`
- `database150mers.kraken` - `database150mers.kraken`
- **KrakenUniq** output of `krakenuniq-build` command(s) A directory containing:
- `opts.k2d`
- `hash.k2d`
- `taxo.k2d`
- `database.idx`
- `taxDB`
- **Centrifuge** output of `centrifuge-build`. A directory containing: - **Centrifuge** output of `centrifuge-build`. A directory containing:
- `<database_name>.<number>.cf` - `<database_name>.<number>.cf`
- `<database_name>.<number>.cf` - `<database_name>.<number>.cf`
@ -177,7 +184,7 @@ work # Directory containing the nextflow working files
### Sequencing quality control ### Sequencing quality control
nf-core taxprofiler offers [`falco`](https://github.com/smithlabcode/falco] as an alternative option to [`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/). nf-core taxprofiler offers [`falco`][https://github.com/smithlabcode/falco] as an alternative option to [`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
### Preprocessing Steps ### Preprocessing Steps
@ -442,7 +449,7 @@ NXF_OPTS='-Xms1g -Xmx4g'
## Troubleshooting and FAQs ## Troubleshooting and FAQs
### I get a warning during centrifuge_kreport process with exit status 255. ### I get a warning during centrifuge_kreport process with exit status 255
When a sample has insufficient hits for abundance estimation, the resulting `report.txt` file will be empty. When a sample has insufficient hits for abundance estimation, the resulting `report.txt` file will be empty.

View file

@ -12,9 +12,9 @@ class WorkflowMain {
// TODO nf-core: Add Zenodo DOI for pipeline after first release // TODO nf-core: Add Zenodo DOI for pipeline after first release
//"* The pipeline\n" + //"* The pipeline\n" +
//" https://doi.org/10.5281/zenodo.XXXXXXX\n\n" + //" https://doi.org/10.5281/zenodo.XXXXXXX\n\n" +
"* The nf-core framework\n" + '* The nf-core framework\n' +
" https://doi.org/10.1038/s41587-020-0439-x\n\n" + ' https://doi.org/10.1038/s41587-020-0439-x\n\n' +
"* Software dependencies\n" + '* Software dependencies\n' +
" https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md"
} }
@ -53,15 +53,15 @@ class WorkflowMain {
System.exit(0) System.exit(0)
} }
// Print parameter summary log to screen
log.info paramsSummaryLog(workflow, params, log)
// Validate workflow parameters via the JSON schema // Validate workflow parameters via the JSON schema
if (params.validate_params) { if (params.validate_params) {
NfcoreSchema.validateParameters(workflow, params, log) NfcoreSchema.validateParameters(workflow, params, log)
} }
// Print parameter summary log to screen
log.info paramsSummaryLog(workflow, params, log)
// Check that a -profile or Nextflow config has been provided to run the pipeline // Check that a -profile or Nextflow config has been provided to run the pipeline
NfcoreTemplate.checkConfigProvided(workflow, log) NfcoreTemplate.checkConfigProvided(workflow, log)
@ -90,4 +90,5 @@ class WorkflowMain {
} }
return null return null
} }
} }

View file

@ -97,6 +97,10 @@
"branch": "master", "branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
}, },
"krakenuniq/preloadedkrakenuniq": {
"branch": "master",
"git_sha": "05649975c6611c6e007537a7984e186e12ae03af"
},
"krona/ktimporttaxonomy": { "krona/ktimporttaxonomy": {
"branch": "master", "branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
@ -107,7 +111,7 @@
}, },
"malt/run": { "malt/run": {
"branch": "master", "branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" "git_sha": "6d9712f03ec2de8264a50ee4541a617e1e063b51"
}, },
"megan/rma2info": { "megan/rma2info": {
"branch": "master", "branch": "master",

View file

@ -0,0 +1,224 @@
process KRAKENUNIQ_PRELOADEDKRAKENUNIQ {
tag "$meta.id"
label 'process_high'
conda (params.enable_conda ? "bioconda::krakenuniq=1.0.0" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/krakenuniq:1.0.0--pl5321h19e8d03_0':
'quay.io/biocontainers/krakenuniq:1.0.0--pl5321h19e8d03_0' }"
input:
tuple val(meta), path(fastqs)
path db
val ram_chunk_size
val save_output_fastqs
val report_file
val save_output
output:
tuple val(meta), path('*.classified{.,_}*') , optional:true, emit: classified_reads_fastq
tuple val(meta), path('*.unclassified{.,_}*') , optional:true, emit: unclassified_reads_fastq
tuple val(meta), path('*classified.txt') , optional:true, emit: classified_assignment
tuple val(meta), path('*report.txt') , emit: report
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def args2 = task.ext.args ?: ''
def classified = meta.single_end ? '"\${PREFIX}.classified.fastq"' : '"\${PREFIX}.classified#.fastq"'
def unclassified = meta.single_end ? '"\${PREFIX}.unclassified.fastq"' : '"\${PREFIX}.unclassified#.fastq"'
def classified_option = save_output_fastqs ? "--classified-out ${classified}" : ''
def unclassified_option = save_output_fastqs ? "--unclassified-out ${unclassified}" : ''
def output_option = save_output ? '--output "\${PREFIX}.krakenuniq.classified.txt"' : ''
def report = report_file ? '--report-file "\${PREFIX}.krakenuniq.report.txt"' : ''
def compress_reads_command = save_output_fastqs ? 'gzip --no-name *.fastq' : ''
if (meta.single_end) {
"""
krakenuniq \\
--db $db \\
--preload \\
--preload-size $ram_chunk_size \\
--threads $task.cpus \\
$args
strip_suffix() {
local result=\$1
# Strip any file extensions.
echo "\${result%%.*}"
}
printf "%s\\n" ${fastqs} | while read FASTQ; do \\
PREFIX="\$(strip_suffix "\${FASTQ}")"
krakenuniq \\
--db $db \\
--threads $task.cpus \\
$report \\
$output_option \\
$unclassified_option \\
$classified_option \\
$output_option \\
$args2 \\
"\${FASTQ}"
done
$compress_reads_command
cat <<-END_VERSIONS > versions.yml
"${task.process}":
krakenuniq: \$(echo \$(krakenuniq --version 2>&1) | sed 's/^.*KrakenUniq version //; s/ .*\$//')
END_VERSIONS
"""
} else {
"""
krakenuniq \\
--db $db \\
--preload \\
--preload-size $ram_chunk_size \\
--threads $task.cpus \\
$args
strip_suffix() {
local result
read result
# Strip any trailing dot or underscore.
result="\${result%_}"
echo "\${result%.}"
}
printf "%s %s\\n" ${fastqs} | while read FASTQ; do \\
read -r -a FASTQ <<< "\${FASTQ}"
PREFIX="\$(printf "%s\\n" "\${FASTQ[@]}" | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' | strip_suffix)"
krakenuniq \\
--db $db \\
--threads $task.cpus \\
$report \\
$output_option \\
$unclassified_option \\
$classified_option \\
$output_option \\
--paired \\
$args2 \\
"\${FASTQ[@]}"
done
$compress_reads_command
cat <<-END_VERSIONS > versions.yml
"${task.process}":
krakenuniq: \$(echo \$(krakenuniq --version 2>&1) | sed 's/^.*KrakenUniq version //; s/ .*\$//')
END_VERSIONS
"""
}
stub:
def args = task.ext.args ?: ''
def args2 = task.ext.args ?: ''
def classified = meta.single_end ? '"\${PREFIX}.classified.fastq"' : '"\${PREFIX}.classified#.fastq"'
def unclassified = meta.single_end ? '"\${PREFIX}.unclassified.fastq"' : '"\${PREFIX}.unclassified#.fastq"'
def classified_option = save_output_fastqs ? "--classified-out ${classified}" : ''
def unclassified_option = save_output_fastqs ? "--unclassified-out ${unclassified}" : ''
def output_option = save_output ? '--output "\${PREFIX}.krakenuniq.classified.txt"' : ''
def report = report_file ? '--report-file "\${PREFIX}.krakenuniq.report.txt"' : ''
def compress_reads_command = save_output_fastqs ? 'gzip --no-name *.fastq' : ''
if (meta.single_end) {
"""
echo krakenuniq \\
--db $db \\
--preload \\
--preload-size $ram_chunk_size \\
--threads $task.cpus \\
$args
strip_suffix() {
local result=\$1
# Strip any file extensions.
echo "\${result%%.*}"
}
printf "%s\\n" ${fastqs} | while read FASTQ; do \\
echo "\${FASTQ}"
PREFIX="\$(strip_suffix "\${FASTQ}")"
echo "\${PREFIX}"
echo krakenuniq \\
--db $db \\
--threads $task.cpus \\
$report \\
$output_option \\
$unclassified_option \\
$classified_option \\
$output_option \\
$args2 \\
"\${FASTQ}"
touch "\${PREFIX}.classified.fastq.gz"
touch "\${PREFIX}.krakenuniq.classified.txt"
touch "\${PREFIX}.krakenuniq.report.txt"
touch "\${PREFIX}.unclassified.fastq.gz"
done
echo $compress_reads_command
cat <<-END_VERSIONS > versions.yml
"${task.process}":
krakenuniq: \$(echo \$(krakenuniq --version 2>&1) | sed 's/^.*KrakenUniq version //; s/ .*\$//')
END_VERSIONS
"""
} else {
"""
echo krakenuniq \\
--db $db \\
--preload \\
--preload-size $ram_chunk_size \\
--threads $task.cpus \\
$args
strip_suffix() {
local result
read result
# Strip any trailing dot or underscore.
result="\${result%_}"
echo "\${result%.}"
}
printf "%s %s\\n" ${fastqs} | while read FASTQ; do \\
read -r -a FASTQ <<< "\${FASTQ}"
echo "\${FASTQ[@]}"
PREFIX="\$(printf "%s\\n" "\${FASTQ[@]}" | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' | strip_suffix)"
echo "\${PREFIX}"
echo krakenuniq \\
--db $db \\
--threads $task.cpus \\
$report \\
$output_option \\
$unclassified_option \\
$classified_option \\
$output_option \\
--paired \\
$args2 \\
"\${FASTQ[@]}"
touch "\${PREFIX}.classified_1.fastq.gz" "\${PREFIX}.classified_2.fastq.gz"
touch "\${PREFIX}.krakenuniq.classified.txt"
touch "\${PREFIX}.krakenuniq.report.txt"
touch "\${PREFIX}.unclassified_1.fastq.gz" "\${PREFIX}.unclassified_2.fastq.gz"
done
echo $compress_reads_command
cat <<-END_VERSIONS > versions.yml
"${task.process}":
krakenuniq: \$(echo \$(krakenuniq --version 2>&1) | sed 's/^.*KrakenUniq version //; s/ .*\$//')
END_VERSIONS
"""
}
}

View file

@ -0,0 +1,78 @@
name: "krakenuniq_preloadedkrakenuniq"
description: Classifies metagenomic sequence data using unique k-mer counts
keywords:
- classify
- metagenomics
- kmers
- fastq
- db
tools:
- "krakenuniq":
description: "Metagenomics classifier with unique k-mer counting for more specific results"
homepage: https://github.com/fbreitwieser/krakenuniq
documentation: https://github.com/fbreitwieser/krakenuniq
doi: 10.1186/s13059-018-1568-0
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- fastqs:
type: file
description: List of input FastQ files
- db:
type: directory
description: KrakenUniq database
- ram_chunk_size:
type: val
description: Amount of maximum amount of RAM each chunk of database that should be loaded at any one time
pattern: "*GB"
- save_output_fastqs:
type: boolean
description: |
If true, optional commands are added to save classified and unclassified reads
as fastq files
- save_reads_assignment:
type: boolean
description: |
If true, an optional command is added to save a file reporting the taxonomic
classification of each input read
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- classified_reads_fastq:
type: file
description: |
Reads classified as belonging to any of the taxa
on the KrakenUniq database.
pattern: "*.fastq.gz"
- unclassified_reads_fastq:
type: file
description: |
Reads not classified to any of the taxa
on the KrakenUniq database.
pattern: "*.fastq.gz"
- classified_assignment:
type: file
description: |
KrakenUniq output file indicating the taxonomic assignment of
each input read ## DOUBLE CHECK!!
- report:
type: file
description: |
KrakenUniq report containing stats about classified
and not classifed reads.
pattern: "*.report.txt"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@mjamy"
- "@Midnighter"

View file

@ -2,14 +2,13 @@ process MALT_RUN {
tag "$meta.id" tag "$meta.id"
label 'process_high' label 'process_high'
conda (params.enable_conda ? "bioconda::malt=0.41" : null) conda (params.enable_conda ? "bioconda::malt=0.61" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/malt:0.41--1' : 'https://depot.galaxyproject.org/singularity/malt:0.61--hdfd78af_0' :
'quay.io/biocontainers/malt:0.41--1' }" 'quay.io/biocontainers/malt:0.61--hdfd78af_0' }"
input: input:
tuple val(meta), path(fastqs) tuple val(meta), path(fastqs)
val mode
path index path index
output: output:
@ -38,7 +37,6 @@ process MALT_RUN {
-o . \\ -o . \\
$args \\ $args \\
--inFile ${fastqs.join(' ')} \\ --inFile ${fastqs.join(' ')} \\
-m $mode \\
--index $index/ |&tee ${prefix}-malt-run.log --index $index/ |&tee ${prefix}-malt-run.log
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml

View file

@ -28,10 +28,6 @@ input:
type: file type: file
description: Input FASTQ files description: Input FASTQ files
pattern: "*.{fastq.gz,fq.gz}" pattern: "*.{fastq.gz,fq.gz}"
- mode:
type: string
description: Program mode
pattern: "Unknown|BlastN|BlastP|BlastX|Classifier"
- index: - index:
type: directory type: directory
description: Index/database directory from malt-build description: Index/database directory from malt-build

View file

@ -119,6 +119,12 @@ params {
kraken2_save_readclassification = false // added directly to module in profiling.nf kraken2_save_readclassification = false // added directly to module in profiling.nf
kraken2_save_minimizers = false kraken2_save_minimizers = false
//krakenuniq
run_krakenuniq = false
krakenuniq_ram_chunk_size = '16G'
krakenuniq_save_reads = false // added directly to module in profiling.nf
krakenuniq_save_readclassifications = false // added directly to module in profiling.nf
// Bracken // Bracken
run_bracken = false run_bracken = false
@ -239,6 +245,7 @@ profiles {
test_nopreprocessing { includeConfig 'conf/test_nopreprocessing.config' } test_nopreprocessing { includeConfig 'conf/test_nopreprocessing.config' }
test_nothing { includeConfig 'conf/test_nothing.config' } test_nothing { includeConfig 'conf/test_nothing.config' }
test_motus { includeConfig 'conf/test_motus.config' } test_motus { includeConfig 'conf/test_motus.config' }
test_krakenuniq { includeConfig 'conf/test_krakenuniq.config' }
test_pep { includeConfig 'conf/test_pep.config' } test_pep { includeConfig 'conf/test_pep.config' }
} }

View file

@ -402,6 +402,30 @@
"fa_icon": "fas fa-save", "fa_icon": "fas fa-save",
"help_text": "Turn on saving minimizer information in the kraken2 report thus increasing to an eight column layout.\n\nAdds `--report-minimizer-data` to the kraken2 command." "help_text": "Turn on saving minimizer information in the kraken2 report thus increasing to an eight column layout.\n\nAdds `--report-minimizer-data` to the kraken2 command."
}, },
"run_krakenuniq": {
"type": "boolean",
"fa_icon": "fas fa-toggle-on",
"description": "Turn on profiling with KrakenUniq. Requires database to be present CSV file passed to --databases"
},
"krakenuniq_save_reads": {
"type": "boolean",
"fa_icon": "fas fa-save",
"description": "Turn on saving of KrakenUniq-aligned reads",
"help_text": "Save reads that do and do not have a taxonomic classification in your output results directory in FASTQ format.\n\n> Modifies tool parameter(s):\n> - krakenuniq: `--classified-out` and `--unclassified-out`"
},
"krakenuniq_ram_chunk_size": {
"type": "string",
"default": "16G",
"description": "Specify how large to chunk database when loading into memory for KrakenUniq",
"fa_icon": "fas fa-database",
"help_text": "nf-core/taxprofiler utilises a 'low memory' option for KrakenUniq that can reduce the amount of RAM the process requires using the `--preloaded` option.\n\nA further extension to this option is that you can specify how large each chunk of the database should be that gets loaded into memory at any one time. You can specify the amount of RAM to chunk the database to with this parameter, and is particularly useful for people with limited computational resources.\n\nMore information about this parameter can be seen [here](https://github.com/fbreitwieser/krakenuniq/blob/master/README.md#new-release-v07).\n\n> Modifies KrakenUniq parameter: --preload\n\n> \n\n"
},
"krakenuniq_save_readclassifications": {
"type": "boolean",
"fa_icon": "fas fa-save",
"description": "Turn on saving of KrakenUniq per-read taxonomic assignment file",
"help_text": "Save a text file that contains a list of each read that had a taxonomic assignment, with information on specific taxonomic taxonomic assignment that that read recieved.\n\n> Modifies tool parameter(s):\n> - krakenuniq: `--output`"
},
"run_bracken": { "run_bracken": {
"type": "boolean", "type": "boolean",
"description": "Post-process kraken2 reports with Bracken.", "description": "Post-process kraken2 reports with Bracken.",

View file

@ -13,6 +13,7 @@ include { METAPHLAN3_METAPHLAN3 } from '../../modules/nf-core/me
include { KAIJU_KAIJU } from '../../modules/nf-core/kaiju/kaiju/main' include { KAIJU_KAIJU } from '../../modules/nf-core/kaiju/kaiju/main'
include { DIAMOND_BLASTX } from '../../modules/nf-core/diamond/blastx/main' include { DIAMOND_BLASTX } from '../../modules/nf-core/diamond/blastx/main'
include { MOTUS_PROFILE } from '../../modules/nf-core/motus/profile/main' include { MOTUS_PROFILE } from '../../modules/nf-core/motus/profile/main'
include { KRAKENUNIQ_PRELOADEDKRAKENUNIQ } from '../../modules/nf-core/krakenuniq/preloadedkrakenuniq/main'
workflow PROFILING { workflow PROFILING {
take: take:
@ -47,6 +48,7 @@ workflow PROFILING {
kaiju: it[2]['tool'] == 'kaiju' kaiju: it[2]['tool'] == 'kaiju'
diamond: it[2]['tool'] == 'diamond' diamond: it[2]['tool'] == 'diamond'
motus: it[2]['tool'] == 'motus' motus: it[2]['tool'] == 'motus'
krakenuniq: it[2]['tool'] == 'krakenuniq'
unknown: true unknown: true
} }
@ -95,7 +97,7 @@ workflow PROFILING {
db: it[2] db: it[2]
} }
MALT_RUN ( ch_input_for_malt.reads, params.malt_mode, ch_input_for_malt.db ) MALT_RUN ( ch_input_for_malt.reads, ch_input_for_malt.db )
ch_maltrun_for_megan = MALT_RUN.out.rma6 ch_maltrun_for_megan = MALT_RUN.out.rma6
.transpose() .transpose()
@ -265,6 +267,28 @@ workflow PROFILING {
ch_multiqc_files = ch_multiqc_files.mix( MOTUS_PROFILE.out.log ) ch_multiqc_files = ch_multiqc_files.mix( MOTUS_PROFILE.out.log )
} }
if ( params.run_krakenuniq ) {
ch_input_for_krakenuniq = ch_input_for_profiling.krakenuniq
.map {
meta, reads, db_meta, db ->
[[id: db_meta.db_name, single_end: meta.single_end], reads, db_meta, db]
}
.groupTuple(by: [0,2,3])
.dump(tag: "krakenuniq_premultimap")
.multiMap {
single_meta, reads, db_meta, db ->
reads: [ single_meta + db_meta, reads.flatten() ]
db: db
}
// Hardcode to _always_ produce the report file (which is our basic otput, and goes into)
KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads.dump(tag: "krakenuniq_input"), ch_input_for_krakenuniq.db.dump(tag: "krakenuniq_db"), params.krakenuniq_ram_chunk_size, params.krakenuniq_save_reads, true, params.krakenuniq_save_readclassifications )
ch_multiqc_files = ch_multiqc_files.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report )
ch_versions = ch_versions.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.versions.first() )
ch_raw_classifications = ch_raw_classifications.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.classified_assignment )
ch_raw_profiles = ch_raw_profiles.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report )
}
emit: emit:
classifications = ch_raw_classifications classifications = ch_raw_classifications
profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom