mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-22 23:49:55 +00:00
Merge branch 'dev' into hostremoval-publish-fix
This commit is contained in:
commit
cd3c8f6a89
12 changed files with 159 additions and 17 deletions
|
@ -44,6 +44,7 @@ On release, automated continuous integration tests run the pipeline on a full-si
|
||||||
- [Centrifuge](https://ccb.jhu.edu/software/centrifuge/)
|
- [Centrifuge](https://ccb.jhu.edu/software/centrifuge/)
|
||||||
- [Kaiju](https://kaiju.binf.ku.dk/)
|
- [Kaiju](https://kaiju.binf.ku.dk/)
|
||||||
- [mOTUs](https://motu-tool.org/)
|
- [mOTUs](https://motu-tool.org/)
|
||||||
|
- [MetaMaps](https://github.com/DiltheyLab/MetaMaps)
|
||||||
4. Perform optional post-processing with:
|
4. Perform optional post-processing with:
|
||||||
- [bracken](https://ccb.jhu.edu/software/bracken/)
|
- [bracken](https://ccb.jhu.edu/software/bracken/)
|
||||||
5. Standardises output tables
|
5. Standardises output tables
|
||||||
|
|
|
@ -251,7 +251,7 @@ process {
|
||||||
pattern: '*.txt'
|
pattern: '*.txt'
|
||||||
]
|
]
|
||||||
ext.args = { "${meta.db_params}" }
|
ext.args = { "${meta.db_params}" }
|
||||||
ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
|
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
|
||||||
}
|
}
|
||||||
|
|
||||||
withName: CUSTOM_DUMPSOFTWAREVERSIONS {
|
withName: CUSTOM_DUMPSOFTWAREVERSIONS {
|
||||||
|
@ -270,4 +270,13 @@ process {
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
withName: KAIJU_KAIJU {
|
||||||
|
publishDir = [
|
||||||
|
path: { "${params.outdir}/kaiju/${meta.db_name}" },
|
||||||
|
mode: params.publish_dir_mode,
|
||||||
|
pattern: '*.tsv'
|
||||||
|
]
|
||||||
|
ext.args = { "${meta.db_params}" }
|
||||||
|
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,15 +22,16 @@ params {
|
||||||
// Input data
|
// Input data
|
||||||
// TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
|
// TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
|
||||||
// TODO nf-core: Give any required params for the test so that command line flags are not needed
|
// TODO nf-core: Give any required params for the test so that command line flags are not needed
|
||||||
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
|
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
|
||||||
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
|
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
|
||||||
run_kraken2 = true
|
|
||||||
run_malt = true
|
|
||||||
run_metaphlan3 = true
|
|
||||||
run_centrifuge = true
|
|
||||||
perform_shortread_clipmerge = true
|
perform_shortread_clipmerge = true
|
||||||
perform_longread_clip = false
|
perform_longread_clip = false
|
||||||
perform_shortread_complexityfilter = true
|
perform_shortread_complexityfilter = true
|
||||||
perform_shortread_hostremoval = true
|
perform_shortread_hostremoval = true
|
||||||
shortread_hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
|
shortread_hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
|
||||||
|
run_kaiju = true
|
||||||
|
run_kraken2 = true
|
||||||
|
run_malt = true
|
||||||
|
run_metaphlan3 = true
|
||||||
|
run_centrifuge = true
|
||||||
}
|
}
|
||||||
|
|
|
@ -124,6 +124,10 @@ Expected (uncompressed) database files for each tool are as follows:
|
||||||
- `mpa_v30_CHOCOPhlAn_201901.rev.1.bt2`
|
- `mpa_v30_CHOCOPhlAn_201901.rev.1.bt2`
|
||||||
- `mpa_v30_CHOCOPhlAn_201901.rev.2.bt2`
|
- `mpa_v30_CHOCOPhlAn_201901.rev.2.bt2`
|
||||||
- `mpa_latest`
|
- `mpa_latest`
|
||||||
|
- **Kaiju** output of `kaiju-makedb`. A directory containing:
|
||||||
|
- `kaiju_db_*.fmi`
|
||||||
|
- `nodes.dmp`
|
||||||
|
- `names.dmp`
|
||||||
|
|
||||||
## Running the pipeline
|
## Running the pipeline
|
||||||
|
|
||||||
|
|
|
@ -53,6 +53,9 @@
|
||||||
},
|
},
|
||||||
"untar": {
|
"untar": {
|
||||||
"git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918"
|
"git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918"
|
||||||
|
},
|
||||||
|
"kaiju/kaiju": {
|
||||||
|
"git_sha": "8856f127c58f6af479128be8b8df4d42e442ddbe"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
41
modules/nf-core/modules/kaiju/kaiju/main.nf
generated
Normal file
41
modules/nf-core/modules/kaiju/kaiju/main.nf
generated
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
process KAIJU_KAIJU {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_high'
|
||||||
|
|
||||||
|
conda (params.enable_conda ? "bioconda::kaiju=1.8.2" : null)
|
||||||
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
|
'https://depot.galaxyproject.org/singularity/kaiju:1.8.2--h5b5514e_1':
|
||||||
|
'quay.io/biocontainers/kaiju:1.8.2--h5b5514e_1' }"
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(reads)
|
||||||
|
path(db)
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path('*.tsv'), emit: results
|
||||||
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
|
when:
|
||||||
|
task.ext.when == null || task.ext.when
|
||||||
|
|
||||||
|
script:
|
||||||
|
def args = task.ext.args ?: ''
|
||||||
|
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||||
|
def input = meta.single_end ? "-i ${reads}" : "-i ${reads[0]} -j ${reads[1]}"
|
||||||
|
"""
|
||||||
|
dbnodes=`find -L ${db} -name "*nodes.dmp"`
|
||||||
|
dbname=`find -L ${db} -name "*.fmi" -not -name "._*"`
|
||||||
|
kaiju \\
|
||||||
|
$args \\
|
||||||
|
-z $task.cpus \\
|
||||||
|
-t \$dbnodes \\
|
||||||
|
-f \$dbname \\
|
||||||
|
-o ${prefix}.tsv \\
|
||||||
|
$input
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
kaiju: \$(echo \$( kaiju -h 2>&1 | sed -n 1p | sed 's/^.*Kaiju //' ))
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
53
modules/nf-core/modules/kaiju/kaiju/meta.yml
generated
Normal file
53
modules/nf-core/modules/kaiju/kaiju/meta.yml
generated
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
name: kaiju_kaiju
|
||||||
|
description: Taxonomic classification of metagenomic sequence data using a protein reference database
|
||||||
|
keywords:
|
||||||
|
- classify
|
||||||
|
- metagenomics
|
||||||
|
- fastq
|
||||||
|
- taxonomic profiling
|
||||||
|
tools:
|
||||||
|
- kaiju:
|
||||||
|
description: Fast and sensitive taxonomic classification for metagenomics
|
||||||
|
homepage: https://kaiju.binf.ku.dk/
|
||||||
|
documentation: https://github.com/bioinformatics-centre/kaiju/blob/master/README.md
|
||||||
|
tool_dev_url: https://github.com/bioinformatics-centre/kaiju
|
||||||
|
doi: "10.1038/ncomms11257"
|
||||||
|
licence: ["GNU GPL v3"]
|
||||||
|
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- reads:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
List of input fastq/fasta files of size 1 and 2 for single-end and paired-end data,
|
||||||
|
respectively.
|
||||||
|
pattern: "*.{fastq,fq,fasta,fa,fsa,fas,fna,fastq.gz,fq.gz,fasta.gz,fa.gz,fsa.gz,fas.gz,fna.gz}"
|
||||||
|
- db:
|
||||||
|
type: files
|
||||||
|
description: |
|
||||||
|
List containing the database and nodes files for Kaiju
|
||||||
|
e.g. [ 'database.fmi', 'nodes.dmp' ]
|
||||||
|
|
||||||
|
output:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: "versions.yml"
|
||||||
|
- results:
|
||||||
|
type: file
|
||||||
|
description: Results with taxonomic classification of each read
|
||||||
|
pattern: "*.tsv"
|
||||||
|
|
||||||
|
authors:
|
||||||
|
- "@talnor"
|
||||||
|
- "@sofstam"
|
||||||
|
- "@jfy133"
|
|
@ -104,6 +104,9 @@ params {
|
||||||
|
|
||||||
// metaphlan3
|
// metaphlan3
|
||||||
run_metaphlan3 = false
|
run_metaphlan3 = false
|
||||||
|
|
||||||
|
// kaiju
|
||||||
|
run_kaiju = false
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load base.config by default for all pipelines
|
// Load base.config by default for all pipelines
|
||||||
|
|
|
@ -379,6 +379,9 @@
|
||||||
"save_hostremoval_unmapped": {
|
"save_hostremoval_unmapped": {
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
|
"run_kaiju": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
"malt_generatemegansummary": {
|
"malt_generatemegansummary": {
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,6 +45,7 @@ def create_fastq_channel(LinkedHashMap row) {
|
||||||
meta.run_accession = row.run_accession
|
meta.run_accession = row.run_accession
|
||||||
meta.instrument_platform = row.instrument_platform
|
meta.instrument_platform = row.instrument_platform
|
||||||
meta.single_end = row.single_end.toBoolean()
|
meta.single_end = row.single_end.toBoolean()
|
||||||
|
meta.is_fasta = false
|
||||||
|
|
||||||
// add path(s) of the fastq file(s) to the meta map
|
// add path(s) of the fastq file(s) to the meta map
|
||||||
def fastq_meta = []
|
def fastq_meta = []
|
||||||
|
@ -75,6 +76,7 @@ def create_fasta_channel(LinkedHashMap row) {
|
||||||
meta.run_accession = row.run_accession
|
meta.run_accession = row.run_accession
|
||||||
meta.instrument_platform = row.instrument_platform
|
meta.instrument_platform = row.instrument_platform
|
||||||
meta.single_end = true
|
meta.single_end = true
|
||||||
|
meta.is_fasta = true
|
||||||
|
|
||||||
def array = []
|
def array = []
|
||||||
if (!file(row.fasta).exists()) {
|
if (!file(row.fasta).exists()) {
|
||||||
|
|
|
@ -7,6 +7,7 @@ include { MEGAN_RMA2INFO } from '../../modules/nf-core/modules/mega
|
||||||
include { KRAKEN2_KRAKEN2 } from '../../modules/nf-core/modules/kraken2/kraken2/main'
|
include { KRAKEN2_KRAKEN2 } from '../../modules/nf-core/modules/kraken2/kraken2/main'
|
||||||
include { CENTRIFUGE_CENTRIFUGE } from '../../modules/nf-core/modules/centrifuge/centrifuge/main'
|
include { CENTRIFUGE_CENTRIFUGE } from '../../modules/nf-core/modules/centrifuge/centrifuge/main'
|
||||||
include { METAPHLAN3 } from '../../modules/nf-core/modules/metaphlan3/main'
|
include { METAPHLAN3 } from '../../modules/nf-core/modules/metaphlan3/main'
|
||||||
|
include { KAIJU_KAIJU } from '../../modules/nf-core/modules/kaiju/kaiju/main'
|
||||||
|
|
||||||
workflow PROFILING {
|
workflow PROFILING {
|
||||||
take:
|
take:
|
||||||
|
@ -37,6 +38,7 @@ workflow PROFILING {
|
||||||
kraken2: it[2]['tool'] == 'kraken2'
|
kraken2: it[2]['tool'] == 'kraken2'
|
||||||
metaphlan3: it[2]['tool'] == 'metaphlan3'
|
metaphlan3: it[2]['tool'] == 'metaphlan3'
|
||||||
centrifuge: it[2]['tool'] == 'centrifuge'
|
centrifuge: it[2]['tool'] == 'centrifuge'
|
||||||
|
kaiju: it[2]['tool'] == 'kaiju'
|
||||||
unknown: true
|
unknown: true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -77,13 +79,28 @@ workflow PROFILING {
|
||||||
}
|
}
|
||||||
|
|
||||||
ch_input_for_centrifuge = ch_input_for_profiling.centrifuge
|
ch_input_for_centrifuge = ch_input_for_profiling.centrifuge
|
||||||
.multiMap {
|
.filter{
|
||||||
it ->
|
if (it[0].is_fasta) log.warn "[nf-core/taxprofiler] Centrifuge currently does not accept FASTA files as input. Skipping Centrifuge for sample ${it[0].id}."
|
||||||
reads: [ it[0] + it[2], it[1] ]
|
!it[0].is_fasta
|
||||||
db: it[3]
|
}
|
||||||
}
|
.multiMap {
|
||||||
|
it ->
|
||||||
|
reads: [ it[0] + it[2], it[1] ]
|
||||||
|
db: it[3]
|
||||||
|
}
|
||||||
|
|
||||||
ch_input_for_metaphlan3 = ch_input_for_profiling.metaphlan3
|
ch_input_for_metaphlan3 = ch_input_for_profiling.metaphlan3
|
||||||
|
.filter{
|
||||||
|
if (it[0].is_fasta) log.warn "[nf-core/taxprofiler] MetaPhlAn3 currently does not accept FASTA files as input. Skipping MetaPhlAn3 for sample ${it[0].id}."
|
||||||
|
!it[0].is_fasta
|
||||||
|
}
|
||||||
|
.multiMap {
|
||||||
|
it ->
|
||||||
|
reads: [it[0] + it[2], it[1]]
|
||||||
|
db: it[3]
|
||||||
|
}
|
||||||
|
|
||||||
|
ch_input_for_kaiju = ch_input_for_profiling.kaiju
|
||||||
.multiMap {
|
.multiMap {
|
||||||
it ->
|
it ->
|
||||||
reads: [it[0] + it[2], it[1]]
|
reads: [it[0] + it[2], it[1]]
|
||||||
|
@ -135,6 +152,10 @@ workflow PROFILING {
|
||||||
ch_raw_profiles = ch_raw_profiles.mix( METAPHLAN3.out.biom )
|
ch_raw_profiles = ch_raw_profiles.mix( METAPHLAN3.out.biom )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( params.run_kaiju ) {
|
||||||
|
KAIJU_KAIJU ( ch_input_for_kaiju.reads, ch_input_for_kaiju.db )
|
||||||
|
ch_versions = ch_versions.mix( KAIJU_KAIJU.out.versions.first() )
|
||||||
|
}
|
||||||
|
|
||||||
emit:
|
emit:
|
||||||
profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom
|
profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom
|
||||||
|
|
|
@ -19,11 +19,11 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true
|
||||||
// Check mandatory parameters
|
// Check mandatory parameters
|
||||||
if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
|
if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
|
||||||
if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
|
if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
|
||||||
if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] warning: MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files."
|
if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files."
|
||||||
if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "[nf-core/taxprofiler] error: cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs"
|
if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "ERROR: [nf-core/taxprofiler] cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs"
|
||||||
|
|
||||||
if (params.perform_shortread_hostremoval && !params.shortread_hostremoval_reference) { exit 1, "[nf-core/taxprofiler] error: --shortread_hostremoval requested but no --shortread_hostremoval_reference FASTA supplied. Check input." }
|
if (params.perform_shortread_hostremoval && !params.shortread_hostremoval_reference) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval requested but no --shortread_hostremoval_reference FASTA supplied. Check input." }
|
||||||
if (!params.shortread_hostremoval_reference && params.shortread_hostremoval_reference_index) { exit 1, "[nf-core/taxprofiler] error: --shortread_hostremoval_index provided but no --shortread_hostremoval_reference FASTA supplied. Check input." }
|
if (!params.shortread_hostremoval_reference && params.shortread_hostremoval_reference_index) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval_index provided but no --shortread_hostremoval_reference FASTA supplied. Check input." }
|
||||||
|
|
||||||
if (params.shortread_hostremoval_reference ) { ch_reference = file(params.shortread_hostremoval_reference) }
|
if (params.shortread_hostremoval_reference ) { ch_reference = file(params.shortread_hostremoval_reference) }
|
||||||
if (params.shortread_hostremoval_index ) { ch_reference_index = file(params.shortread_hostremoval_index ) } else { ch_reference_index = [] }
|
if (params.shortread_hostremoval_index ) { ch_reference_index = file(params.shortread_hostremoval_index ) } else { ch_reference_index = [] }
|
||||||
|
@ -175,10 +175,11 @@ workflow TAXPROFILER {
|
||||||
meta, reads ->
|
meta, reads ->
|
||||||
[ meta, [ reads ].flatten() ]
|
[ meta, [ reads ].flatten() ]
|
||||||
}
|
}
|
||||||
|
.mix( INPUT_CHECK.out.fasta )
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
ch_reads_runmerged = ch_shortreads_hostremoved
|
ch_reads_runmerged = ch_shortreads_hostremoved
|
||||||
.mix( ch_longreads_preprocessed )
|
.mix( ch_longreads_preprocessed, INPUT_CHECK.out.fasta )
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
Loading…
Reference in a new issue