mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-12-22 15:08:17 +00:00
Merge branch 'dev' into update_motus
This commit is contained in:
commit
6d140c683a
20 changed files with 518 additions and 22 deletions
33
.github/workflows/ci.yml
vendored
33
.github/workflows/ci.yml
vendored
|
@ -101,3 +101,36 @@ jobs:
|
|||
with:
|
||||
command: nextflow run ${GITHUB_WORKSPACE} -profile test_motus,docker --outdir ./results --databases ./database_motus.csv
|
||||
attempt_limit: 3
|
||||
|
||||
krakenuniq:
|
||||
name: Test KrakenUniq with workflow parameters
|
||||
if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/taxprofiler') }}
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
NXF_VER:
|
||||
- "21.10.3"
|
||||
- "latest-everything"
|
||||
|
||||
steps:
|
||||
- name: Check out pipeline code
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Install Nextflow
|
||||
uses: nf-core/setup-nextflow@v1
|
||||
with:
|
||||
version: "${{ matrix.NXF_VER }}"
|
||||
|
||||
- name: Show current locale
|
||||
run: locale
|
||||
|
||||
- name: Set UTF-8 enabled locale
|
||||
run: |
|
||||
sudo locale-gen en_US.UTF-8
|
||||
sudo update-locale LANG=en_US.UTF-8
|
||||
|
||||
- name: Run pipeline with test data
|
||||
uses: Wandalen/wretry.action@v1.0.11
|
||||
with:
|
||||
command: nextflow run ${GITHUB_WORKSPACE} -profile test_krakenuniq,docker --outdir ./results
|
||||
attempt_limit: 3
|
||||
|
|
|
@ -36,6 +36,10 @@
|
|||
|
||||
> Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. Improved Metagenomic Analysis with Kraken 2. Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0.
|
||||
|
||||
- [KrakenUniq](https://doi.org/10.1186/s13059-018-1568-0)
|
||||
|
||||
> Breitwieser, Florian P., Daniel N. Baker, and Steven L. Salzberg. 2018. KrakenUniq: confident and fast metagenomics classification using unique k-mer counts. Genome Biology 19 (1): 198. doi: 10.1186/s13059-018-1568-0
|
||||
|
||||
- [Bracken](https://doi.org/10.7717/peerj-cs.104)
|
||||
|
||||
> Lu, J., Breitwieser, F. P., Thielen, P., & Salzberg, S. L. (2017). Bracken: Estimating species abundance in metagenomics data. PeerJ Computer Science, 3, e104. doi: 10.7717/peerj-cs.104
|
||||
|
|
|
@ -281,7 +281,7 @@ process {
|
|||
}
|
||||
|
||||
withName: MALT_RUN {
|
||||
ext.args = { "${meta.db_params}" }
|
||||
ext.args = { "${meta.db_params} -m ${params.malt_mode}" }
|
||||
// one run with multiple samples, so fix ID to just db name to ensure clean log name
|
||||
ext.prefix = { "${meta.db_name}" }
|
||||
publishDir = [
|
||||
|
@ -330,6 +330,17 @@ process {
|
|||
]
|
||||
}
|
||||
|
||||
withName: KRAKENUNIQ_PRELOADEDKRAKENUNIQ {
|
||||
ext.args = { "${meta.db_params}" }
|
||||
// one run with multiple samples, so fix ID to just db name to ensure clean log name
|
||||
ext.prefix = { "${meta.db_name}" }
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/krakenuniq/${meta.db_name}/" },
|
||||
mode: params.publish_dir_mode,
|
||||
pattern: '*.{txt,report,fastq.gz}'
|
||||
]
|
||||
}
|
||||
|
||||
withName: KRONA_CLEANUP {
|
||||
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
|
||||
publishDir = [
|
||||
|
|
|
@ -39,6 +39,7 @@ params {
|
|||
run_metaphlan3 = true
|
||||
run_centrifuge = true
|
||||
run_diamond = true
|
||||
run_krakenuniq = true
|
||||
run_motus = false
|
||||
run_krona = true
|
||||
krona_taxonomy_directory = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab'
|
||||
|
@ -51,6 +52,7 @@ params {
|
|||
process {
|
||||
withName: MALT_RUN {
|
||||
maxForks = 1
|
||||
ext.args = { "-m ${params.malt_mode} -J-Xmx12G" }
|
||||
}
|
||||
withName: MEGAN_RMA2INFO_TSV {
|
||||
maxForks = 1
|
||||
|
|
72
conf/test_krakenuniq.config
Normal file
72
conf/test_krakenuniq.config
Normal file
|
@ -0,0 +1,72 @@
|
|||
/*
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
Nextflow config file for running minimal tests
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
Defines input files and everything required to run a fast and simple pipeline test.
|
||||
|
||||
Use as follows:
|
||||
nextflow run nf-core/taxprofiler -profile test,<docker/singularity> --outdir <OUTDIR>
|
||||
|
||||
----------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//
|
||||
// Separate test as KrakenUniq database can sometimes be too big for GHA
|
||||
//
|
||||
|
||||
params {
|
||||
config_profile_name = 'Test profile'
|
||||
config_profile_description = 'Minimal test to check KrakenUniq function'
|
||||
|
||||
// Limit resources so that this can run on GitHub Actions
|
||||
max_cpus = 2
|
||||
max_memory = '6.GB'
|
||||
max_time = '6.h'
|
||||
|
||||
// Input data
|
||||
// TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
|
||||
// TODO nf-core: Give any required params for the test so that command line flags are not needed
|
||||
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
|
||||
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_krakenuniq.csv'
|
||||
perform_shortread_qc = true
|
||||
perform_longread_qc = true
|
||||
shortread_qc_mergepairs = true
|
||||
perform_shortread_complexityfilter = true
|
||||
perform_shortread_hostremoval = true
|
||||
perform_longread_hostremoval = true
|
||||
perform_runmerging = true
|
||||
hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
|
||||
run_kaiju = false
|
||||
run_kraken2 = false
|
||||
run_bracken = false
|
||||
run_malt = false
|
||||
run_metaphlan3 = false
|
||||
run_centrifuge = false
|
||||
run_diamond = false
|
||||
run_krakenuniq = true
|
||||
run_motus = false
|
||||
run_krona = true
|
||||
krona_taxonomy_directory = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab'
|
||||
malt_save_reads = true
|
||||
kraken2_save_reads = true
|
||||
centrifuge_save_reads = true
|
||||
diamond_save_reads = true
|
||||
}
|
||||
|
||||
process {
|
||||
withName: MALT_RUN {
|
||||
maxForks = 1
|
||||
}
|
||||
withName: MEGAN_RMA2INFO_TSV {
|
||||
maxForks = 1
|
||||
}
|
||||
withName: MEGAN_RMA2INFO_KRONA {
|
||||
maxForks = 1
|
||||
}
|
||||
withName: 'EIDO_VALIDATE' {
|
||||
ext.args = '--st-index sample'
|
||||
}
|
||||
withName: 'EIDO_CONVERT' {
|
||||
ext.args = '--st-index sample'
|
||||
}
|
||||
}
|
|
@ -10,6 +10,10 @@
|
|||
----------------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
//
|
||||
// Separate test as mOTUs database download can be flaky
|
||||
//
|
||||
|
||||
params {
|
||||
config_profile_name = 'mOTUs Test profile'
|
||||
config_profile_description = 'Minimal test to check mOTUs function'
|
||||
|
@ -38,6 +42,7 @@ params {
|
|||
run_metaphlan3 = false
|
||||
run_centrifuge = false
|
||||
run_diamond = false
|
||||
run_krakenuniq = false
|
||||
run_motus = true
|
||||
motus_save_mgc_read_counts = true
|
||||
motus_remove_ncbi_ids = true
|
||||
|
|
|
@ -38,6 +38,7 @@ params {
|
|||
run_metaphlan3 = true
|
||||
run_centrifuge = true
|
||||
run_diamond = true
|
||||
run_krakenuniq = true
|
||||
run_motus = false
|
||||
run_krona = true
|
||||
}
|
||||
|
@ -45,5 +46,6 @@ params {
|
|||
process {
|
||||
withName: MALT_RUN {
|
||||
maxForks = 1
|
||||
ext.args = { "-m ${params.malt_mode} -J-Xmx12G" }
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,6 +39,7 @@ params {
|
|||
run_metaphlan3 = false
|
||||
run_centrifuge = false
|
||||
run_diamond = false
|
||||
run_krakenuniq = false
|
||||
run_motus = false
|
||||
}
|
||||
|
||||
|
|
|
@ -38,11 +38,13 @@ params {
|
|||
run_metaphlan3 = false
|
||||
run_centrifuge = false
|
||||
run_diamond = false
|
||||
run_krakenuniq = false
|
||||
run_motus = false
|
||||
}
|
||||
|
||||
process {
|
||||
withName: MALT_RUN {
|
||||
maxForks = 1
|
||||
ext.args = { "-m ${params.malt_mode} -J-Xmx12G" }
|
||||
}
|
||||
}
|
||||
|
|
|
@ -37,6 +37,7 @@ params {
|
|||
process {
|
||||
withName: MALT_RUN {
|
||||
maxForks = 1
|
||||
ext.args = { "-m ${params.malt_mode} -J-Xmx12G" }
|
||||
}
|
||||
withName: MEGAN_RMA2INFO {
|
||||
maxForks = 1
|
||||
|
|
|
@ -74,7 +74,7 @@ The pipeline takes the locations and specific profiling parameters of the tool o
|
|||
|
||||
> ⚠️ nf-core/taxprofiler does not provide any databases by default, nor does it currently generate them for you. This must be performed manually by the user. See below for more information of the expected database files.
|
||||
|
||||
An example database sheet can look as follows, where 4 tools are being used, and `malt` and `kraken2` will be used against two databases each. This is because specifying `bracken` implies first running `kraken2` on the same database.
|
||||
An example database sheet can look as follows, where 5 tools are being used, and `malt` and `kraken2` will be used against two databases each. This is because specifying `bracken` implies first running `kraken2` on the same database.
|
||||
|
||||
```console
|
||||
tool,db_name,db_params,db_path
|
||||
|
@ -82,6 +82,7 @@ malt,malt85,-id 85,/<path>/<to>/malt/testdb-malt/
|
|||
malt,malt95,-id 90,/<path>/<to>/malt/testdb-malt.tar.gz
|
||||
bracken,db1,,/<path>/<to>/bracken/testdb-bracken.tar.gz
|
||||
kraken2,db2,--quick,/<path>/<to>/kraken2/testdb-kraken2.tar.gz
|
||||
krakenuniq,db3,,/<path>/<to>/krakenuniq/testdb-krakenuniq.tar.gz
|
||||
centrifuge,db1,,/<path>/<to>/centrifuge/minigut_cf.tar.gz
|
||||
metaphlan3,db1,,/<path>/<to>/metaphlan3/metaphlan_database/
|
||||
motus,db_mOTU,,/<path>/<to>/motus/motus_database/
|
||||
|
@ -125,6 +126,12 @@ Expected (uncompressed) database files for each tool are as follows:
|
|||
- `database100mers.kraken`
|
||||
- `database150mers.kmer_distrib`
|
||||
- `database150mers.kraken`
|
||||
- **KrakenUniq** output of `krakenuniq-build` command(s) A directory containing:
|
||||
- `opts.k2d`
|
||||
- `hash.k2d`
|
||||
- `taxo.k2d`
|
||||
- `database.idx`
|
||||
- `taxDB`
|
||||
- **Centrifuge** output of `centrifuge-build`. A directory containing:
|
||||
- `<database_name>.<number>.cf`
|
||||
- `<database_name>.<number>.cf`
|
||||
|
@ -177,7 +184,7 @@ work # Directory containing the nextflow working files
|
|||
|
||||
### Sequencing quality control
|
||||
|
||||
nf-core taxprofiler offers [`falco`](https://github.com/smithlabcode/falco] as an alternative option to [`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
|
||||
nf-core taxprofiler offers [`falco`][https://github.com/smithlabcode/falco] as an alternative option to [`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
|
||||
|
||||
### Preprocessing Steps
|
||||
|
||||
|
@ -442,7 +449,7 @@ NXF_OPTS='-Xms1g -Xmx4g'
|
|||
|
||||
## Troubleshooting and FAQs
|
||||
|
||||
### I get a warning during centrifuge_kreport process with exit status 255.
|
||||
### I get a warning during centrifuge_kreport process with exit status 255
|
||||
|
||||
When a sample has insufficient hits for abundance estimation, the resulting `report.txt` file will be empty.
|
||||
|
||||
|
|
|
@ -12,9 +12,9 @@ class WorkflowMain {
|
|||
// TODO nf-core: Add Zenodo DOI for pipeline after first release
|
||||
//"* The pipeline\n" +
|
||||
//" https://doi.org/10.5281/zenodo.XXXXXXX\n\n" +
|
||||
"* The nf-core framework\n" +
|
||||
" https://doi.org/10.1038/s41587-020-0439-x\n\n" +
|
||||
"* Software dependencies\n" +
|
||||
'* The nf-core framework\n' +
|
||||
' https://doi.org/10.1038/s41587-020-0439-x\n\n' +
|
||||
'* Software dependencies\n' +
|
||||
" https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md"
|
||||
}
|
||||
|
||||
|
@ -53,15 +53,15 @@ class WorkflowMain {
|
|||
System.exit(0)
|
||||
}
|
||||
|
||||
// Print parameter summary log to screen
|
||||
|
||||
log.info paramsSummaryLog(workflow, params, log)
|
||||
|
||||
// Validate workflow parameters via the JSON schema
|
||||
if (params.validate_params) {
|
||||
NfcoreSchema.validateParameters(workflow, params, log)
|
||||
}
|
||||
|
||||
// Print parameter summary log to screen
|
||||
|
||||
log.info paramsSummaryLog(workflow, params, log)
|
||||
|
||||
// Check that a -profile or Nextflow config has been provided to run the pipeline
|
||||
NfcoreTemplate.checkConfigProvided(workflow, log)
|
||||
|
||||
|
@ -90,4 +90,5 @@ class WorkflowMain {
|
|||
}
|
||||
return null
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -97,6 +97,10 @@
|
|||
"branch": "master",
|
||||
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
|
||||
},
|
||||
"krakenuniq/preloadedkrakenuniq": {
|
||||
"branch": "master",
|
||||
"git_sha": "05649975c6611c6e007537a7984e186e12ae03af"
|
||||
},
|
||||
"krona/ktimporttaxonomy": {
|
||||
"branch": "master",
|
||||
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
|
||||
|
@ -107,7 +111,7 @@
|
|||
},
|
||||
"malt/run": {
|
||||
"branch": "master",
|
||||
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
|
||||
"git_sha": "6d9712f03ec2de8264a50ee4541a617e1e063b51"
|
||||
},
|
||||
"megan/rma2info": {
|
||||
"branch": "master",
|
||||
|
|
224
modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf
generated
Normal file
224
modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf
generated
Normal file
|
@ -0,0 +1,224 @@
|
|||
process KRAKENUNIQ_PRELOADEDKRAKENUNIQ {
|
||||
tag "$meta.id"
|
||||
label 'process_high'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::krakenuniq=1.0.0" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/krakenuniq:1.0.0--pl5321h19e8d03_0':
|
||||
'quay.io/biocontainers/krakenuniq:1.0.0--pl5321h19e8d03_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(fastqs)
|
||||
path db
|
||||
val ram_chunk_size
|
||||
val save_output_fastqs
|
||||
val report_file
|
||||
val save_output
|
||||
|
||||
output:
|
||||
tuple val(meta), path('*.classified{.,_}*') , optional:true, emit: classified_reads_fastq
|
||||
tuple val(meta), path('*.unclassified{.,_}*') , optional:true, emit: unclassified_reads_fastq
|
||||
tuple val(meta), path('*classified.txt') , optional:true, emit: classified_assignment
|
||||
tuple val(meta), path('*report.txt') , emit: report
|
||||
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def args2 = task.ext.args ?: ''
|
||||
|
||||
def classified = meta.single_end ? '"\${PREFIX}.classified.fastq"' : '"\${PREFIX}.classified#.fastq"'
|
||||
def unclassified = meta.single_end ? '"\${PREFIX}.unclassified.fastq"' : '"\${PREFIX}.unclassified#.fastq"'
|
||||
def classified_option = save_output_fastqs ? "--classified-out ${classified}" : ''
|
||||
def unclassified_option = save_output_fastqs ? "--unclassified-out ${unclassified}" : ''
|
||||
def output_option = save_output ? '--output "\${PREFIX}.krakenuniq.classified.txt"' : ''
|
||||
def report = report_file ? '--report-file "\${PREFIX}.krakenuniq.report.txt"' : ''
|
||||
def compress_reads_command = save_output_fastqs ? 'gzip --no-name *.fastq' : ''
|
||||
if (meta.single_end) {
|
||||
"""
|
||||
krakenuniq \\
|
||||
--db $db \\
|
||||
--preload \\
|
||||
--preload-size $ram_chunk_size \\
|
||||
--threads $task.cpus \\
|
||||
$args
|
||||
|
||||
strip_suffix() {
|
||||
local result=\$1
|
||||
# Strip any file extensions.
|
||||
echo "\${result%%.*}"
|
||||
}
|
||||
|
||||
printf "%s\\n" ${fastqs} | while read FASTQ; do \\
|
||||
PREFIX="\$(strip_suffix "\${FASTQ}")"
|
||||
|
||||
krakenuniq \\
|
||||
--db $db \\
|
||||
--threads $task.cpus \\
|
||||
$report \\
|
||||
$output_option \\
|
||||
$unclassified_option \\
|
||||
$classified_option \\
|
||||
$output_option \\
|
||||
$args2 \\
|
||||
"\${FASTQ}"
|
||||
done
|
||||
|
||||
$compress_reads_command
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
krakenuniq: \$(echo \$(krakenuniq --version 2>&1) | sed 's/^.*KrakenUniq version //; s/ .*\$//')
|
||||
END_VERSIONS
|
||||
"""
|
||||
} else {
|
||||
"""
|
||||
krakenuniq \\
|
||||
--db $db \\
|
||||
--preload \\
|
||||
--preload-size $ram_chunk_size \\
|
||||
--threads $task.cpus \\
|
||||
$args
|
||||
|
||||
strip_suffix() {
|
||||
local result
|
||||
read result
|
||||
# Strip any trailing dot or underscore.
|
||||
result="\${result%_}"
|
||||
echo "\${result%.}"
|
||||
}
|
||||
|
||||
printf "%s %s\\n" ${fastqs} | while read FASTQ; do \\
|
||||
read -r -a FASTQ <<< "\${FASTQ}"
|
||||
PREFIX="\$(printf "%s\\n" "\${FASTQ[@]}" | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' | strip_suffix)"
|
||||
|
||||
krakenuniq \\
|
||||
--db $db \\
|
||||
--threads $task.cpus \\
|
||||
$report \\
|
||||
$output_option \\
|
||||
$unclassified_option \\
|
||||
$classified_option \\
|
||||
$output_option \\
|
||||
--paired \\
|
||||
$args2 \\
|
||||
"\${FASTQ[@]}"
|
||||
done
|
||||
|
||||
$compress_reads_command
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
krakenuniq: \$(echo \$(krakenuniq --version 2>&1) | sed 's/^.*KrakenUniq version //; s/ .*\$//')
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
||||
|
||||
stub:
|
||||
def args = task.ext.args ?: ''
|
||||
def args2 = task.ext.args ?: ''
|
||||
|
||||
def classified = meta.single_end ? '"\${PREFIX}.classified.fastq"' : '"\${PREFIX}.classified#.fastq"'
|
||||
def unclassified = meta.single_end ? '"\${PREFIX}.unclassified.fastq"' : '"\${PREFIX}.unclassified#.fastq"'
|
||||
def classified_option = save_output_fastqs ? "--classified-out ${classified}" : ''
|
||||
def unclassified_option = save_output_fastqs ? "--unclassified-out ${unclassified}" : ''
|
||||
def output_option = save_output ? '--output "\${PREFIX}.krakenuniq.classified.txt"' : ''
|
||||
def report = report_file ? '--report-file "\${PREFIX}.krakenuniq.report.txt"' : ''
|
||||
def compress_reads_command = save_output_fastqs ? 'gzip --no-name *.fastq' : ''
|
||||
if (meta.single_end) {
|
||||
"""
|
||||
echo krakenuniq \\
|
||||
--db $db \\
|
||||
--preload \\
|
||||
--preload-size $ram_chunk_size \\
|
||||
--threads $task.cpus \\
|
||||
$args
|
||||
|
||||
strip_suffix() {
|
||||
local result=\$1
|
||||
# Strip any file extensions.
|
||||
echo "\${result%%.*}"
|
||||
}
|
||||
|
||||
printf "%s\\n" ${fastqs} | while read FASTQ; do \\
|
||||
echo "\${FASTQ}"
|
||||
PREFIX="\$(strip_suffix "\${FASTQ}")"
|
||||
echo "\${PREFIX}"
|
||||
|
||||
echo krakenuniq \\
|
||||
--db $db \\
|
||||
--threads $task.cpus \\
|
||||
$report \\
|
||||
$output_option \\
|
||||
$unclassified_option \\
|
||||
$classified_option \\
|
||||
$output_option \\
|
||||
$args2 \\
|
||||
"\${FASTQ}"
|
||||
|
||||
touch "\${PREFIX}.classified.fastq.gz"
|
||||
touch "\${PREFIX}.krakenuniq.classified.txt"
|
||||
touch "\${PREFIX}.krakenuniq.report.txt"
|
||||
touch "\${PREFIX}.unclassified.fastq.gz"
|
||||
done
|
||||
|
||||
echo $compress_reads_command
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
krakenuniq: \$(echo \$(krakenuniq --version 2>&1) | sed 's/^.*KrakenUniq version //; s/ .*\$//')
|
||||
END_VERSIONS
|
||||
"""
|
||||
} else {
|
||||
"""
|
||||
echo krakenuniq \\
|
||||
--db $db \\
|
||||
--preload \\
|
||||
--preload-size $ram_chunk_size \\
|
||||
--threads $task.cpus \\
|
||||
$args
|
||||
|
||||
strip_suffix() {
|
||||
local result
|
||||
read result
|
||||
# Strip any trailing dot or underscore.
|
||||
result="\${result%_}"
|
||||
echo "\${result%.}"
|
||||
}
|
||||
|
||||
printf "%s %s\\n" ${fastqs} | while read FASTQ; do \\
|
||||
read -r -a FASTQ <<< "\${FASTQ}"
|
||||
echo "\${FASTQ[@]}"
|
||||
PREFIX="\$(printf "%s\\n" "\${FASTQ[@]}" | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' | strip_suffix)"
|
||||
echo "\${PREFIX}"
|
||||
|
||||
echo krakenuniq \\
|
||||
--db $db \\
|
||||
--threads $task.cpus \\
|
||||
$report \\
|
||||
$output_option \\
|
||||
$unclassified_option \\
|
||||
$classified_option \\
|
||||
$output_option \\
|
||||
--paired \\
|
||||
$args2 \\
|
||||
"\${FASTQ[@]}"
|
||||
|
||||
touch "\${PREFIX}.classified_1.fastq.gz" "\${PREFIX}.classified_2.fastq.gz"
|
||||
touch "\${PREFIX}.krakenuniq.classified.txt"
|
||||
touch "\${PREFIX}.krakenuniq.report.txt"
|
||||
touch "\${PREFIX}.unclassified_1.fastq.gz" "\${PREFIX}.unclassified_2.fastq.gz"
|
||||
done
|
||||
|
||||
echo $compress_reads_command
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
krakenuniq: \$(echo \$(krakenuniq --version 2>&1) | sed 's/^.*KrakenUniq version //; s/ .*\$//')
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
||||
}
|
78
modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml
generated
Normal file
78
modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml
generated
Normal file
|
@ -0,0 +1,78 @@
|
|||
name: "krakenuniq_preloadedkrakenuniq"
|
||||
description: Classifies metagenomic sequence data using unique k-mer counts
|
||||
keywords:
|
||||
- classify
|
||||
- metagenomics
|
||||
- kmers
|
||||
- fastq
|
||||
- db
|
||||
tools:
|
||||
- "krakenuniq":
|
||||
description: "Metagenomics classifier with unique k-mer counting for more specific results"
|
||||
homepage: https://github.com/fbreitwieser/krakenuniq
|
||||
documentation: https://github.com/fbreitwieser/krakenuniq
|
||||
doi: 10.1186/s13059-018-1568-0
|
||||
licence: ["MIT"]
|
||||
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- fastqs:
|
||||
type: file
|
||||
description: List of input FastQ files
|
||||
- db:
|
||||
type: directory
|
||||
description: KrakenUniq database
|
||||
- ram_chunk_size:
|
||||
type: val
|
||||
description: Amount of maximum amount of RAM each chunk of database that should be loaded at any one time
|
||||
pattern: "*GB"
|
||||
- save_output_fastqs:
|
||||
type: boolean
|
||||
description: |
|
||||
If true, optional commands are added to save classified and unclassified reads
|
||||
as fastq files
|
||||
- save_reads_assignment:
|
||||
type: boolean
|
||||
description: |
|
||||
If true, an optional command is added to save a file reporting the taxonomic
|
||||
classification of each input read
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- classified_reads_fastq:
|
||||
type: file
|
||||
description: |
|
||||
Reads classified as belonging to any of the taxa
|
||||
on the KrakenUniq database.
|
||||
pattern: "*.fastq.gz"
|
||||
- unclassified_reads_fastq:
|
||||
type: file
|
||||
description: |
|
||||
Reads not classified to any of the taxa
|
||||
on the KrakenUniq database.
|
||||
pattern: "*.fastq.gz"
|
||||
- classified_assignment:
|
||||
type: file
|
||||
description: |
|
||||
KrakenUniq output file indicating the taxonomic assignment of
|
||||
each input read ## DOUBLE CHECK!!
|
||||
- report:
|
||||
type: file
|
||||
description: |
|
||||
KrakenUniq report containing stats about classified
|
||||
and not classifed reads.
|
||||
pattern: "*.report.txt"
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
authors:
|
||||
- "@mjamy"
|
||||
- "@Midnighter"
|
8
modules/nf-core/malt/run/main.nf
generated
8
modules/nf-core/malt/run/main.nf
generated
|
@ -2,14 +2,13 @@ process MALT_RUN {
|
|||
tag "$meta.id"
|
||||
label 'process_high'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::malt=0.41" : null)
|
||||
conda (params.enable_conda ? "bioconda::malt=0.61" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/malt:0.41--1' :
|
||||
'quay.io/biocontainers/malt:0.41--1' }"
|
||||
'https://depot.galaxyproject.org/singularity/malt:0.61--hdfd78af_0' :
|
||||
'quay.io/biocontainers/malt:0.61--hdfd78af_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(fastqs)
|
||||
val mode
|
||||
path index
|
||||
|
||||
output:
|
||||
|
@ -38,7 +37,6 @@ process MALT_RUN {
|
|||
-o . \\
|
||||
$args \\
|
||||
--inFile ${fastqs.join(' ')} \\
|
||||
-m $mode \\
|
||||
--index $index/ |&tee ${prefix}-malt-run.log
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
|
|
4
modules/nf-core/malt/run/meta.yml
generated
4
modules/nf-core/malt/run/meta.yml
generated
|
@ -28,10 +28,6 @@ input:
|
|||
type: file
|
||||
description: Input FASTQ files
|
||||
pattern: "*.{fastq.gz,fq.gz}"
|
||||
- mode:
|
||||
type: string
|
||||
description: Program mode
|
||||
pattern: "Unknown|BlastN|BlastP|BlastX|Classifier"
|
||||
- index:
|
||||
type: directory
|
||||
description: Index/database directory from malt-build
|
||||
|
|
|
@ -119,6 +119,12 @@ params {
|
|||
kraken2_save_readclassification = false // added directly to module in profiling.nf
|
||||
kraken2_save_minimizers = false
|
||||
|
||||
//krakenuniq
|
||||
run_krakenuniq = false
|
||||
krakenuniq_ram_chunk_size = '16G'
|
||||
krakenuniq_save_reads = false // added directly to module in profiling.nf
|
||||
krakenuniq_save_readclassifications = false // added directly to module in profiling.nf
|
||||
|
||||
// Bracken
|
||||
run_bracken = false
|
||||
|
||||
|
@ -239,6 +245,7 @@ profiles {
|
|||
test_nopreprocessing { includeConfig 'conf/test_nopreprocessing.config' }
|
||||
test_nothing { includeConfig 'conf/test_nothing.config' }
|
||||
test_motus { includeConfig 'conf/test_motus.config' }
|
||||
test_krakenuniq { includeConfig 'conf/test_krakenuniq.config' }
|
||||
test_pep { includeConfig 'conf/test_pep.config' }
|
||||
}
|
||||
|
||||
|
|
|
@ -402,6 +402,30 @@
|
|||
"fa_icon": "fas fa-save",
|
||||
"help_text": "Turn on saving minimizer information in the kraken2 report thus increasing to an eight column layout.\n\nAdds `--report-minimizer-data` to the kraken2 command."
|
||||
},
|
||||
"run_krakenuniq": {
|
||||
"type": "boolean",
|
||||
"fa_icon": "fas fa-toggle-on",
|
||||
"description": "Turn on profiling with KrakenUniq. Requires database to be present CSV file passed to --databases"
|
||||
},
|
||||
"krakenuniq_save_reads": {
|
||||
"type": "boolean",
|
||||
"fa_icon": "fas fa-save",
|
||||
"description": "Turn on saving of KrakenUniq-aligned reads",
|
||||
"help_text": "Save reads that do and do not have a taxonomic classification in your output results directory in FASTQ format.\n\n> Modifies tool parameter(s):\n> - krakenuniq: `--classified-out` and `--unclassified-out`"
|
||||
},
|
||||
"krakenuniq_ram_chunk_size": {
|
||||
"type": "string",
|
||||
"default": "16G",
|
||||
"description": "Specify how large to chunk database when loading into memory for KrakenUniq",
|
||||
"fa_icon": "fas fa-database",
|
||||
"help_text": "nf-core/taxprofiler utilises a 'low memory' option for KrakenUniq that can reduce the amount of RAM the process requires using the `--preloaded` option.\n\nA further extension to this option is that you can specify how large each chunk of the database should be that gets loaded into memory at any one time. You can specify the amount of RAM to chunk the database to with this parameter, and is particularly useful for people with limited computational resources.\n\nMore information about this parameter can be seen [here](https://github.com/fbreitwieser/krakenuniq/blob/master/README.md#new-release-v07).\n\n> Modifies KrakenUniq parameter: --preload\n\n> \n\n"
|
||||
},
|
||||
"krakenuniq_save_readclassifications": {
|
||||
"type": "boolean",
|
||||
"fa_icon": "fas fa-save",
|
||||
"description": "Turn on saving of KrakenUniq per-read taxonomic assignment file",
|
||||
"help_text": "Save a text file that contains a list of each read that had a taxonomic assignment, with information on specific taxonomic taxonomic assignment that that read recieved.\n\n> Modifies tool parameter(s):\n> - krakenuniq: `--output`"
|
||||
},
|
||||
"run_bracken": {
|
||||
"type": "boolean",
|
||||
"description": "Post-process kraken2 reports with Bracken.",
|
||||
|
|
|
@ -13,6 +13,7 @@ include { METAPHLAN3_METAPHLAN3 } from '../../modules/nf-core/me
|
|||
include { KAIJU_KAIJU } from '../../modules/nf-core/kaiju/kaiju/main'
|
||||
include { DIAMOND_BLASTX } from '../../modules/nf-core/diamond/blastx/main'
|
||||
include { MOTUS_PROFILE } from '../../modules/nf-core/motus/profile/main'
|
||||
include { KRAKENUNIQ_PRELOADEDKRAKENUNIQ } from '../../modules/nf-core/krakenuniq/preloadedkrakenuniq/main'
|
||||
|
||||
workflow PROFILING {
|
||||
take:
|
||||
|
@ -47,6 +48,7 @@ workflow PROFILING {
|
|||
kaiju: it[2]['tool'] == 'kaiju'
|
||||
diamond: it[2]['tool'] == 'diamond'
|
||||
motus: it[2]['tool'] == 'motus'
|
||||
krakenuniq: it[2]['tool'] == 'krakenuniq'
|
||||
unknown: true
|
||||
}
|
||||
|
||||
|
@ -95,7 +97,7 @@ workflow PROFILING {
|
|||
db: it[2]
|
||||
}
|
||||
|
||||
MALT_RUN ( ch_input_for_malt.reads, params.malt_mode, ch_input_for_malt.db )
|
||||
MALT_RUN ( ch_input_for_malt.reads, ch_input_for_malt.db )
|
||||
|
||||
ch_maltrun_for_megan = MALT_RUN.out.rma6
|
||||
.transpose()
|
||||
|
@ -265,6 +267,28 @@ workflow PROFILING {
|
|||
ch_multiqc_files = ch_multiqc_files.mix( MOTUS_PROFILE.out.log )
|
||||
}
|
||||
|
||||
if ( params.run_krakenuniq ) {
|
||||
ch_input_for_krakenuniq = ch_input_for_profiling.krakenuniq
|
||||
.map {
|
||||
meta, reads, db_meta, db ->
|
||||
[[id: db_meta.db_name, single_end: meta.single_end], reads, db_meta, db]
|
||||
}
|
||||
.groupTuple(by: [0,2,3])
|
||||
.dump(tag: "krakenuniq_premultimap")
|
||||
.multiMap {
|
||||
single_meta, reads, db_meta, db ->
|
||||
reads: [ single_meta + db_meta, reads.flatten() ]
|
||||
db: db
|
||||
}
|
||||
// Hardcode to _always_ produce the report file (which is our basic otput, and goes into)
|
||||
KRAKENUNIQ_PRELOADEDKRAKENUNIQ ( ch_input_for_krakenuniq.reads.dump(tag: "krakenuniq_input"), ch_input_for_krakenuniq.db.dump(tag: "krakenuniq_db"), params.krakenuniq_ram_chunk_size, params.krakenuniq_save_reads, true, params.krakenuniq_save_readclassifications )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report )
|
||||
ch_versions = ch_versions.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.versions.first() )
|
||||
ch_raw_classifications = ch_raw_classifications.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.classified_assignment )
|
||||
ch_raw_profiles = ch_raw_profiles.mix( KRAKENUNIQ_PRELOADEDKRAKENUNIQ.out.report )
|
||||
|
||||
}
|
||||
|
||||
emit:
|
||||
classifications = ch_raw_classifications
|
||||
profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom
|
||||
|
|
Loading…
Reference in a new issue