mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-22 01:16:03 +00:00
Add Kraken2 and MALT/run as Proof of Concept (currnetly MQC issue)
This commit is contained in:
parent
278f5605ca
commit
2c183ed2ed
8 changed files with 315 additions and 7 deletions
|
@ -68,6 +68,26 @@ process {
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
withName: MALT_RUN {
|
||||||
|
publishDir = [
|
||||||
|
path: { "${params.outdir}/malt/${meta.db_name}" },
|
||||||
|
mode: 'copy',
|
||||||
|
pattern: '*.{rma6,tab,text,sam,log}'
|
||||||
|
]
|
||||||
|
ext.args = { "${meta.db_params}" }
|
||||||
|
ext.when = params.run_malt
|
||||||
|
}
|
||||||
|
|
||||||
|
withName: KRAKEN2_KRAKEN2 {
|
||||||
|
publishDir = [
|
||||||
|
path: { "${params.outdir}/kraken2/${meta.db_name}" },
|
||||||
|
mode: 'copy',
|
||||||
|
pattern: '.{fastq.gz,txt}'
|
||||||
|
]
|
||||||
|
ext.args = { "${meta.db_params}" }
|
||||||
|
ext.when = params.run_kraken2
|
||||||
|
ext.prefix = { "${meta.id}-${meta.db_name}" }
|
||||||
|
}
|
||||||
|
|
||||||
withName: CUSTOM_DUMPSOFTWAREVERSIONS {
|
withName: CUSTOM_DUMPSOFTWAREVERSIONS {
|
||||||
publishDir = [
|
publishDir = [
|
||||||
|
|
|
@ -15,6 +15,12 @@
|
||||||
"fastqc": {
|
"fastqc": {
|
||||||
"git_sha": "9d0cad583b9a71a6509b754fdf589cbfbed08961"
|
"git_sha": "9d0cad583b9a71a6509b754fdf589cbfbed08961"
|
||||||
},
|
},
|
||||||
|
"kraken2/kraken2": {
|
||||||
|
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
||||||
|
},
|
||||||
|
"malt/run": {
|
||||||
|
"git_sha": "76cdd46f3f8a77fb5023fb5a39c4ab99925b8b56"
|
||||||
|
},
|
||||||
"multiqc": {
|
"multiqc": {
|
||||||
"git_sha": "20d8250d9f39ddb05dfb437603aaf99b5c0b2b41"
|
"git_sha": "20d8250d9f39ddb05dfb437603aaf99b5c0b2b41"
|
||||||
}
|
}
|
||||||
|
|
49
modules/nf-core/modules/kraken2/kraken2/main.nf
generated
Normal file
49
modules/nf-core/modules/kraken2/kraken2/main.nf
generated
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
process KRAKEN2_KRAKEN2 {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_high'
|
||||||
|
|
||||||
|
conda (params.enable_conda ? 'bioconda::kraken2=2.1.2 conda-forge::pigz=2.6' : null)
|
||||||
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
|
'https://depot.galaxyproject.org/singularity/mulled-v2-5799ab18b5fc681e75923b2450abaa969907ec98:87fc08d11968d081f3e8a37131c1f1f6715b6542-0' :
|
||||||
|
'quay.io/biocontainers/mulled-v2-5799ab18b5fc681e75923b2450abaa969907ec98:87fc08d11968d081f3e8a37131c1f1f6715b6542-0' }"
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(reads)
|
||||||
|
path db
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path('*classified*') , emit: classified
|
||||||
|
tuple val(meta), path('*unclassified*'), emit: unclassified
|
||||||
|
tuple val(meta), path('*report.txt') , emit: txt
|
||||||
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
|
when:
|
||||||
|
task.ext.when == null || task.ext.when
|
||||||
|
|
||||||
|
script:
|
||||||
|
def args = task.ext.args ?: ''
|
||||||
|
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||||
|
def paired = meta.single_end ? "" : "--paired"
|
||||||
|
def classified = meta.single_end ? "${prefix}.classified.fastq" : "${prefix}.classified#.fastq"
|
||||||
|
def unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq"
|
||||||
|
"""
|
||||||
|
kraken2 \\
|
||||||
|
--db $db \\
|
||||||
|
--threads $task.cpus \\
|
||||||
|
--unclassified-out $unclassified \\
|
||||||
|
--classified-out $classified \\
|
||||||
|
--report ${prefix}.kraken2.report.txt \\
|
||||||
|
--gzip-compressed \\
|
||||||
|
$paired \\
|
||||||
|
$args \\
|
||||||
|
$reads
|
||||||
|
|
||||||
|
pigz -p $task.cpus *.fastq
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
kraken2: \$(echo \$(kraken2 --version 2>&1) | sed 's/^.*Kraken version //; s/ .*\$//')
|
||||||
|
pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
60
modules/nf-core/modules/kraken2/kraken2/meta.yml
generated
Normal file
60
modules/nf-core/modules/kraken2/kraken2/meta.yml
generated
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
name: kraken2_kraken2
|
||||||
|
description: Classifies metagenomic sequence data
|
||||||
|
keywords:
|
||||||
|
- classify
|
||||||
|
- metagenomics
|
||||||
|
- fastq
|
||||||
|
- db
|
||||||
|
tools:
|
||||||
|
- kraken2:
|
||||||
|
description: |
|
||||||
|
Kraken2 is a taxonomic sequence classifier that assigns taxonomic labels to sequence reads
|
||||||
|
homepage: https://ccb.jhu.edu/software/kraken2/
|
||||||
|
documentation: https://github.com/DerrickWood/kraken2/wiki/Manual
|
||||||
|
doi: 10.1186/s13059-019-1891-0
|
||||||
|
licence: ["MIT"]
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- reads:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
|
||||||
|
respectively.
|
||||||
|
- db:
|
||||||
|
type: directory
|
||||||
|
description: Kraken2 database
|
||||||
|
output:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- classified:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
Reads classified to belong to any of the taxa
|
||||||
|
on the Kraken2 database.
|
||||||
|
pattern: "*{fastq.gz}"
|
||||||
|
- unclassified:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
Reads not classified to belong to any of the taxa
|
||||||
|
on the Kraken2 database.
|
||||||
|
pattern: "*{fastq.gz}"
|
||||||
|
- txt:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
Kraken2 report containing stats about classified
|
||||||
|
and not classifed reads.
|
||||||
|
pattern: "*.{report.txt}"
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: "versions.yml"
|
||||||
|
authors:
|
||||||
|
- "@joseespinosa"
|
||||||
|
- "@drpatelh"
|
49
modules/nf-core/modules/malt/run/main.nf
generated
Normal file
49
modules/nf-core/modules/malt/run/main.nf
generated
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
process MALT_RUN {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_high'
|
||||||
|
|
||||||
|
conda (params.enable_conda ? "bioconda::malt=0.53" : null)
|
||||||
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
|
'https://depot.galaxyproject.org/singularity/malt:0.53--hdfd78af_0' :
|
||||||
|
'quay.io/biocontainers/malt:0.53--hdfd78af_0' }"
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(fastqs)
|
||||||
|
val mode
|
||||||
|
path index
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path("*.rma6") , emit: rma6
|
||||||
|
tuple val(meta), path("*.{tab,text,sam}"), optional:true, emit: alignments
|
||||||
|
tuple val(meta), path("*.log") , emit: log
|
||||||
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
|
when:
|
||||||
|
task.ext.when == null || task.ext.when
|
||||||
|
|
||||||
|
script:
|
||||||
|
def args = task.ext.args ?: ''
|
||||||
|
def avail_mem = 6
|
||||||
|
if (!task.memory) {
|
||||||
|
log.info '[MALT_RUN] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.'
|
||||||
|
} else {
|
||||||
|
avail_mem = task.memory.giga
|
||||||
|
}
|
||||||
|
|
||||||
|
"""
|
||||||
|
malt-run \\
|
||||||
|
-J-Xmx${avail_mem}g \\
|
||||||
|
-t $task.cpus \\
|
||||||
|
-v \\
|
||||||
|
-o . \\
|
||||||
|
$args \\
|
||||||
|
--inFile ${fastqs.join(' ')} \\
|
||||||
|
-m $mode \\
|
||||||
|
--index $index/ |&tee malt-run.log
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
malt: \$(malt-run --help 2>&1 | grep -o 'version.* ' | cut -f 1 -d ',' | cut -f2 -d ' ')
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
58
modules/nf-core/modules/malt/run/meta.yml
generated
Normal file
58
modules/nf-core/modules/malt/run/meta.yml
generated
Normal file
|
@ -0,0 +1,58 @@
|
||||||
|
name: malt_run
|
||||||
|
description: MALT, an acronym for MEGAN alignment tool, is a sequence alignment and analysis tool designed for processing high-throughput sequencing data, especially in the context of metagenomics.
|
||||||
|
keywords:
|
||||||
|
- malt
|
||||||
|
- alignment
|
||||||
|
- metagenomics
|
||||||
|
- ancient DNA
|
||||||
|
- aDNA
|
||||||
|
- palaeogenomics
|
||||||
|
- archaeogenomics
|
||||||
|
- microbiome
|
||||||
|
tools:
|
||||||
|
- malt:
|
||||||
|
description: A tool for mapping metagenomic data
|
||||||
|
homepage: https://www.wsi.uni-tuebingen.de/lehrstuehle/algorithms-in-bioinformatics/software/malt/
|
||||||
|
documentation: https://software-ab.informatik.uni-tuebingen.de/download/malt/manual.pdf
|
||||||
|
tool_dev_url: None
|
||||||
|
doi: "10.1038/s41559-017-0446-6"
|
||||||
|
licence: ["GPL v3"]
|
||||||
|
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- fastqs:
|
||||||
|
type: file
|
||||||
|
description: Input FASTQ files
|
||||||
|
pattern: "*.{fastq.gz,fq.gz}"
|
||||||
|
- mode:
|
||||||
|
type: string
|
||||||
|
description: Program mode
|
||||||
|
pattern: "Unknown|BlastN|BlastP|BlastX|Classifier"
|
||||||
|
- index:
|
||||||
|
type: directory
|
||||||
|
description: Index/database directory from malt-build
|
||||||
|
pattern: "*/"
|
||||||
|
output:
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: "versions.yml"
|
||||||
|
- rma6:
|
||||||
|
type: file
|
||||||
|
description: MEGAN6 RMA6 file
|
||||||
|
pattern: "*.rma6"
|
||||||
|
- sam:
|
||||||
|
type: file
|
||||||
|
description: Alignment files in Tab, Text or MEGAN-compatible SAM format
|
||||||
|
pattern: "*.{tab,txt,sam}"
|
||||||
|
- log:
|
||||||
|
type: file
|
||||||
|
description: Log of verbose MALT stdout
|
||||||
|
pattern: "malt-run.log"
|
||||||
|
|
||||||
|
authors:
|
||||||
|
- "@jfy133"
|
|
@ -56,6 +56,13 @@ params {
|
||||||
// FASTQ preprocessing
|
// FASTQ preprocessing
|
||||||
fastp_clip_merge = false
|
fastp_clip_merge = false
|
||||||
fastp_exclude_unmerged = true
|
fastp_exclude_unmerged = true
|
||||||
|
|
||||||
|
// MALT
|
||||||
|
run_malt = false
|
||||||
|
malt_mode = 'BlastN'
|
||||||
|
|
||||||
|
// kraken2
|
||||||
|
run_kraken2 = false
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load base.config by default for all pipelines
|
// Load base.config by default for all pipelines
|
||||||
|
|
|
@ -56,6 +56,9 @@ include { MULTIQC } from '../modules/nf-core/modules/multiqc
|
||||||
include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main'
|
include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main'
|
||||||
|
|
||||||
include { CAT_FASTQ } from '../modules/nf-core/modules/cat/fastq/main'
|
include { CAT_FASTQ } from '../modules/nf-core/modules/cat/fastq/main'
|
||||||
|
include { MALT_RUN } from '../modules/nf-core/modules/malt/run/main'
|
||||||
|
include { KRAKEN2_KRAKEN2 } from '../modules/nf-core/modules/kraken2/kraken2/main'
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
========================================================================================
|
========================================================================================
|
||||||
|
@ -95,13 +98,15 @@ workflow TAXPROFILER {
|
||||||
)
|
)
|
||||||
|
|
||||||
//
|
//
|
||||||
// MODULE: Run Clip/Merge/Complexity
|
// PERFORM PREPROCESSING
|
||||||
//
|
//
|
||||||
if ( params.fastp_clip_merge ) {
|
if ( params.fastp_clip_merge ) {
|
||||||
FASTQ_PREPROCESSING ( INPUT_CHECK.out.fastq )
|
FASTQ_PREPROCESSING ( INPUT_CHECK.out.fastq )
|
||||||
}
|
}
|
||||||
|
|
||||||
// MODULE: Cat merge runs of same sample
|
//
|
||||||
|
// PERFORM RUN MERGING
|
||||||
|
//
|
||||||
ch_processed_for_combine = FASTQ_PREPROCESSING.out.reads
|
ch_processed_for_combine = FASTQ_PREPROCESSING.out.reads
|
||||||
.dump(tag: "prep_for_combine_grouping")
|
.dump(tag: "prep_for_combine_grouping")
|
||||||
.map {
|
.map {
|
||||||
|
@ -118,15 +123,61 @@ workflow TAXPROFILER {
|
||||||
|
|
||||||
CAT_FASTQ ( ch_processed_for_combine.combine )
|
CAT_FASTQ ( ch_processed_for_combine.combine )
|
||||||
|
|
||||||
// Ready for profiling!
|
|
||||||
ch_reads_for_profiling = ch_processed_for_combine.skip
|
ch_reads_for_profiling = ch_processed_for_combine.skip
|
||||||
.dump(tag: "skip_combine")
|
.dump(tag: "skip_combine")
|
||||||
.mix( CAT_FASTQ.out.reads )
|
.mix( CAT_FASTQ.out.reads )
|
||||||
.dump(tag: "files_for_profiling")
|
.dump(tag: "files_for_profiling")
|
||||||
|
|
||||||
// Combine reads with possible databases
|
//
|
||||||
|
// COMBINE READS WITH POSSIBLE DATABASES
|
||||||
|
//
|
||||||
|
|
||||||
|
// output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
|
||||||
|
ch_input_for_profiling = ch_reads_for_profiling
|
||||||
|
.combine(DB_CHECK.out.dbs)
|
||||||
|
.dump(tag: "reads_plus_db")
|
||||||
|
.branch {
|
||||||
|
malt: it[2]['tool'] == 'malt'
|
||||||
|
kraken2: it[2]['tool'] == 'kraken2'
|
||||||
|
unknown: true
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// PREP PROFILER INPUT CHANNELS ON PER TOOL BASIS
|
||||||
|
//
|
||||||
|
|
||||||
|
// We groupTuple to have all samples in one channel for MALT as database
|
||||||
|
// loading takes a long time, so we only want to run it once per database
|
||||||
|
ch_input_for_malt = ch_input_for_profiling.malt
|
||||||
|
.map {
|
||||||
|
it ->
|
||||||
|
def temp_meta = [ id: it[2]['db_name']] + it[2]
|
||||||
|
def db = it[3]
|
||||||
|
[ temp_meta, it[1], db ]
|
||||||
|
}
|
||||||
|
.groupTuple(by: [0,2])
|
||||||
|
.dump(tag: "input for malt")
|
||||||
|
.multiMap {
|
||||||
|
it ->
|
||||||
|
reads: [ it[0], it[1].flatten() ]
|
||||||
|
db: it[2]
|
||||||
|
}
|
||||||
|
|
||||||
|
// We can run Kraken2 one-by-one sample-wise
|
||||||
|
ch_input_for_kraken2 = ch_input_for_profiling.kraken2
|
||||||
|
.dump(tag: "input for kraken")
|
||||||
|
.multiMap {
|
||||||
|
it ->
|
||||||
|
reads: [ it[0] + it[2], it[1] ]
|
||||||
|
db: it[3]
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// RUN PROFILING
|
||||||
|
//
|
||||||
|
MALT_RUN ( ch_input_for_malt.reads, params.malt_mode, ch_input_for_malt.db )
|
||||||
|
KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db )
|
||||||
|
|
||||||
ch_reads_for_profiling.combine(DB_CHECK.out.dbs).dump(tag: "reads_plus_db")
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// MODULE: MultiQC
|
// MODULE: MultiQC
|
||||||
|
@ -143,6 +194,14 @@ workflow TAXPROFILER {
|
||||||
if (params.fastp_clip_merge) {
|
if (params.fastp_clip_merge) {
|
||||||
ch_multiqc_files = ch_multiqc_files.mix(FASTQ_PREPROCESSING.out.mqc)
|
ch_multiqc_files = ch_multiqc_files.mix(FASTQ_PREPROCESSING.out.mqc)
|
||||||
}
|
}
|
||||||
|
if (params.run_kraken2) {
|
||||||
|
ch_multiqc_files = ch_multiqc_files.mix(KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]))
|
||||||
|
ch_versions = ch_versions.mix(KRAKEN2_KRAKEN2.out.versions.first())
|
||||||
|
}
|
||||||
|
if (params.run_malt) {
|
||||||
|
ch_multiqc_files = ch_multiqc_files.mix(MALT_RUN.out.log.collect{it[1]}.ifEmpty([]))
|
||||||
|
ch_versions = ch_versions.mix(MALT_RUN.out.versions.first())
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
MULTIQC (
|
MULTIQC (
|
||||||
|
|
Loading…
Reference in a new issue