mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-21 16:36:03 +00:00
Add Kraken2 and MALT/run as Proof of Concept (currnetly MQC issue)
This commit is contained in:
parent
278f5605ca
commit
2c183ed2ed
8 changed files with 315 additions and 7 deletions
|
@ -68,6 +68,26 @@ process {
|
|||
]
|
||||
}
|
||||
|
||||
withName: MALT_RUN {
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/malt/${meta.db_name}" },
|
||||
mode: 'copy',
|
||||
pattern: '*.{rma6,tab,text,sam,log}'
|
||||
]
|
||||
ext.args = { "${meta.db_params}" }
|
||||
ext.when = params.run_malt
|
||||
}
|
||||
|
||||
withName: KRAKEN2_KRAKEN2 {
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/kraken2/${meta.db_name}" },
|
||||
mode: 'copy',
|
||||
pattern: '.{fastq.gz,txt}'
|
||||
]
|
||||
ext.args = { "${meta.db_params}" }
|
||||
ext.when = params.run_kraken2
|
||||
ext.prefix = { "${meta.id}-${meta.db_name}" }
|
||||
}
|
||||
|
||||
withName: CUSTOM_DUMPSOFTWAREVERSIONS {
|
||||
publishDir = [
|
||||
|
|
|
@ -15,6 +15,12 @@
|
|||
"fastqc": {
|
||||
"git_sha": "9d0cad583b9a71a6509b754fdf589cbfbed08961"
|
||||
},
|
||||
"kraken2/kraken2": {
|
||||
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
||||
},
|
||||
"malt/run": {
|
||||
"git_sha": "76cdd46f3f8a77fb5023fb5a39c4ab99925b8b56"
|
||||
},
|
||||
"multiqc": {
|
||||
"git_sha": "20d8250d9f39ddb05dfb437603aaf99b5c0b2b41"
|
||||
}
|
||||
|
|
49
modules/nf-core/modules/kraken2/kraken2/main.nf
generated
Normal file
49
modules/nf-core/modules/kraken2/kraken2/main.nf
generated
Normal file
|
@ -0,0 +1,49 @@
|
|||
process KRAKEN2_KRAKEN2 {
|
||||
tag "$meta.id"
|
||||
label 'process_high'
|
||||
|
||||
conda (params.enable_conda ? 'bioconda::kraken2=2.1.2 conda-forge::pigz=2.6' : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/mulled-v2-5799ab18b5fc681e75923b2450abaa969907ec98:87fc08d11968d081f3e8a37131c1f1f6715b6542-0' :
|
||||
'quay.io/biocontainers/mulled-v2-5799ab18b5fc681e75923b2450abaa969907ec98:87fc08d11968d081f3e8a37131c1f1f6715b6542-0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(reads)
|
||||
path db
|
||||
|
||||
output:
|
||||
tuple val(meta), path('*classified*') , emit: classified
|
||||
tuple val(meta), path('*unclassified*'), emit: unclassified
|
||||
tuple val(meta), path('*report.txt') , emit: txt
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def paired = meta.single_end ? "" : "--paired"
|
||||
def classified = meta.single_end ? "${prefix}.classified.fastq" : "${prefix}.classified#.fastq"
|
||||
def unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq"
|
||||
"""
|
||||
kraken2 \\
|
||||
--db $db \\
|
||||
--threads $task.cpus \\
|
||||
--unclassified-out $unclassified \\
|
||||
--classified-out $classified \\
|
||||
--report ${prefix}.kraken2.report.txt \\
|
||||
--gzip-compressed \\
|
||||
$paired \\
|
||||
$args \\
|
||||
$reads
|
||||
|
||||
pigz -p $task.cpus *.fastq
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
kraken2: \$(echo \$(kraken2 --version 2>&1) | sed 's/^.*Kraken version //; s/ .*\$//')
|
||||
pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
60
modules/nf-core/modules/kraken2/kraken2/meta.yml
generated
Normal file
60
modules/nf-core/modules/kraken2/kraken2/meta.yml
generated
Normal file
|
@ -0,0 +1,60 @@
|
|||
name: kraken2_kraken2
|
||||
description: Classifies metagenomic sequence data
|
||||
keywords:
|
||||
- classify
|
||||
- metagenomics
|
||||
- fastq
|
||||
- db
|
||||
tools:
|
||||
- kraken2:
|
||||
description: |
|
||||
Kraken2 is a taxonomic sequence classifier that assigns taxonomic labels to sequence reads
|
||||
homepage: https://ccb.jhu.edu/software/kraken2/
|
||||
documentation: https://github.com/DerrickWood/kraken2/wiki/Manual
|
||||
doi: 10.1186/s13059-019-1891-0
|
||||
licence: ["MIT"]
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- reads:
|
||||
type: file
|
||||
description: |
|
||||
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
|
||||
respectively.
|
||||
- db:
|
||||
type: directory
|
||||
description: Kraken2 database
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- classified:
|
||||
type: file
|
||||
description: |
|
||||
Reads classified to belong to any of the taxa
|
||||
on the Kraken2 database.
|
||||
pattern: "*{fastq.gz}"
|
||||
- unclassified:
|
||||
type: file
|
||||
description: |
|
||||
Reads not classified to belong to any of the taxa
|
||||
on the Kraken2 database.
|
||||
pattern: "*{fastq.gz}"
|
||||
- txt:
|
||||
type: file
|
||||
description: |
|
||||
Kraken2 report containing stats about classified
|
||||
and not classifed reads.
|
||||
pattern: "*.{report.txt}"
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
authors:
|
||||
- "@joseespinosa"
|
||||
- "@drpatelh"
|
49
modules/nf-core/modules/malt/run/main.nf
generated
Normal file
49
modules/nf-core/modules/malt/run/main.nf
generated
Normal file
|
@ -0,0 +1,49 @@
|
|||
process MALT_RUN {
|
||||
tag "$meta.id"
|
||||
label 'process_high'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::malt=0.53" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/malt:0.53--hdfd78af_0' :
|
||||
'quay.io/biocontainers/malt:0.53--hdfd78af_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(fastqs)
|
||||
val mode
|
||||
path index
|
||||
|
||||
output:
|
||||
tuple val(meta), path("*.rma6") , emit: rma6
|
||||
tuple val(meta), path("*.{tab,text,sam}"), optional:true, emit: alignments
|
||||
tuple val(meta), path("*.log") , emit: log
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def avail_mem = 6
|
||||
if (!task.memory) {
|
||||
log.info '[MALT_RUN] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.'
|
||||
} else {
|
||||
avail_mem = task.memory.giga
|
||||
}
|
||||
|
||||
"""
|
||||
malt-run \\
|
||||
-J-Xmx${avail_mem}g \\
|
||||
-t $task.cpus \\
|
||||
-v \\
|
||||
-o . \\
|
||||
$args \\
|
||||
--inFile ${fastqs.join(' ')} \\
|
||||
-m $mode \\
|
||||
--index $index/ |&tee malt-run.log
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
malt: \$(malt-run --help 2>&1 | grep -o 'version.* ' | cut -f 1 -d ',' | cut -f2 -d ' ')
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
58
modules/nf-core/modules/malt/run/meta.yml
generated
Normal file
58
modules/nf-core/modules/malt/run/meta.yml
generated
Normal file
|
@ -0,0 +1,58 @@
|
|||
name: malt_run
|
||||
description: MALT, an acronym for MEGAN alignment tool, is a sequence alignment and analysis tool designed for processing high-throughput sequencing data, especially in the context of metagenomics.
|
||||
keywords:
|
||||
- malt
|
||||
- alignment
|
||||
- metagenomics
|
||||
- ancient DNA
|
||||
- aDNA
|
||||
- palaeogenomics
|
||||
- archaeogenomics
|
||||
- microbiome
|
||||
tools:
|
||||
- malt:
|
||||
description: A tool for mapping metagenomic data
|
||||
homepage: https://www.wsi.uni-tuebingen.de/lehrstuehle/algorithms-in-bioinformatics/software/malt/
|
||||
documentation: https://software-ab.informatik.uni-tuebingen.de/download/malt/manual.pdf
|
||||
tool_dev_url: None
|
||||
doi: "10.1038/s41559-017-0446-6"
|
||||
licence: ["GPL v3"]
|
||||
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- fastqs:
|
||||
type: file
|
||||
description: Input FASTQ files
|
||||
pattern: "*.{fastq.gz,fq.gz}"
|
||||
- mode:
|
||||
type: string
|
||||
description: Program mode
|
||||
pattern: "Unknown|BlastN|BlastP|BlastX|Classifier"
|
||||
- index:
|
||||
type: directory
|
||||
description: Index/database directory from malt-build
|
||||
pattern: "*/"
|
||||
output:
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
- rma6:
|
||||
type: file
|
||||
description: MEGAN6 RMA6 file
|
||||
pattern: "*.rma6"
|
||||
- sam:
|
||||
type: file
|
||||
description: Alignment files in Tab, Text or MEGAN-compatible SAM format
|
||||
pattern: "*.{tab,txt,sam}"
|
||||
- log:
|
||||
type: file
|
||||
description: Log of verbose MALT stdout
|
||||
pattern: "malt-run.log"
|
||||
|
||||
authors:
|
||||
- "@jfy133"
|
|
@ -54,8 +54,15 @@ params {
|
|||
databases = null
|
||||
|
||||
// FASTQ preprocessing
|
||||
fastp_clip_merge = false
|
||||
fastp_exclude_unmerged = true
|
||||
fastp_clip_merge = false
|
||||
fastp_exclude_unmerged = true
|
||||
|
||||
// MALT
|
||||
run_malt = false
|
||||
malt_mode = 'BlastN'
|
||||
|
||||
// kraken2
|
||||
run_kraken2 = false
|
||||
}
|
||||
|
||||
// Load base.config by default for all pipelines
|
||||
|
|
|
@ -56,6 +56,9 @@ include { MULTIQC } from '../modules/nf-core/modules/multiqc
|
|||
include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main'
|
||||
|
||||
include { CAT_FASTQ } from '../modules/nf-core/modules/cat/fastq/main'
|
||||
include { MALT_RUN } from '../modules/nf-core/modules/malt/run/main'
|
||||
include { KRAKEN2_KRAKEN2 } from '../modules/nf-core/modules/kraken2/kraken2/main'
|
||||
|
||||
|
||||
/*
|
||||
========================================================================================
|
||||
|
@ -95,13 +98,15 @@ workflow TAXPROFILER {
|
|||
)
|
||||
|
||||
//
|
||||
// MODULE: Run Clip/Merge/Complexity
|
||||
// PERFORM PREPROCESSING
|
||||
//
|
||||
if ( params.fastp_clip_merge ) {
|
||||
FASTQ_PREPROCESSING ( INPUT_CHECK.out.fastq )
|
||||
}
|
||||
|
||||
// MODULE: Cat merge runs of same sample
|
||||
//
|
||||
// PERFORM RUN MERGING
|
||||
//
|
||||
ch_processed_for_combine = FASTQ_PREPROCESSING.out.reads
|
||||
.dump(tag: "prep_for_combine_grouping")
|
||||
.map {
|
||||
|
@ -118,15 +123,61 @@ workflow TAXPROFILER {
|
|||
|
||||
CAT_FASTQ ( ch_processed_for_combine.combine )
|
||||
|
||||
// Ready for profiling!
|
||||
ch_reads_for_profiling = ch_processed_for_combine.skip
|
||||
.dump(tag: "skip_combine")
|
||||
.mix( CAT_FASTQ.out.reads )
|
||||
.dump(tag: "files_for_profiling")
|
||||
|
||||
// Combine reads with possible databases
|
||||
//
|
||||
// COMBINE READS WITH POSSIBLE DATABASES
|
||||
//
|
||||
|
||||
// output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
|
||||
ch_input_for_profiling = ch_reads_for_profiling
|
||||
.combine(DB_CHECK.out.dbs)
|
||||
.dump(tag: "reads_plus_db")
|
||||
.branch {
|
||||
malt: it[2]['tool'] == 'malt'
|
||||
kraken2: it[2]['tool'] == 'kraken2'
|
||||
unknown: true
|
||||
}
|
||||
|
||||
//
|
||||
// PREP PROFILER INPUT CHANNELS ON PER TOOL BASIS
|
||||
//
|
||||
|
||||
// We groupTuple to have all samples in one channel for MALT as database
|
||||
// loading takes a long time, so we only want to run it once per database
|
||||
ch_input_for_malt = ch_input_for_profiling.malt
|
||||
.map {
|
||||
it ->
|
||||
def temp_meta = [ id: it[2]['db_name']] + it[2]
|
||||
def db = it[3]
|
||||
[ temp_meta, it[1], db ]
|
||||
}
|
||||
.groupTuple(by: [0,2])
|
||||
.dump(tag: "input for malt")
|
||||
.multiMap {
|
||||
it ->
|
||||
reads: [ it[0], it[1].flatten() ]
|
||||
db: it[2]
|
||||
}
|
||||
|
||||
// We can run Kraken2 one-by-one sample-wise
|
||||
ch_input_for_kraken2 = ch_input_for_profiling.kraken2
|
||||
.dump(tag: "input for kraken")
|
||||
.multiMap {
|
||||
it ->
|
||||
reads: [ it[0] + it[2], it[1] ]
|
||||
db: it[3]
|
||||
}
|
||||
|
||||
//
|
||||
// RUN PROFILING
|
||||
//
|
||||
MALT_RUN ( ch_input_for_malt.reads, params.malt_mode, ch_input_for_malt.db )
|
||||
KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db )
|
||||
|
||||
ch_reads_for_profiling.combine(DB_CHECK.out.dbs).dump(tag: "reads_plus_db")
|
||||
|
||||
//
|
||||
// MODULE: MultiQC
|
||||
|
@ -143,6 +194,14 @@ workflow TAXPROFILER {
|
|||
if (params.fastp_clip_merge) {
|
||||
ch_multiqc_files = ch_multiqc_files.mix(FASTQ_PREPROCESSING.out.mqc)
|
||||
}
|
||||
if (params.run_kraken2) {
|
||||
ch_multiqc_files = ch_multiqc_files.mix(KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]))
|
||||
ch_versions = ch_versions.mix(KRAKEN2_KRAKEN2.out.versions.first())
|
||||
}
|
||||
if (params.run_malt) {
|
||||
ch_multiqc_files = ch_multiqc_files.mix(MALT_RUN.out.log.collect{it[1]}.ifEmpty([]))
|
||||
ch_versions = ch_versions.mix(MALT_RUN.out.versions.first())
|
||||
}
|
||||
|
||||
|
||||
MULTIQC (
|
||||
|
|
Loading…
Reference in a new issue