mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-22 07:09:56 +00:00
commit
fe628b3578
13 changed files with 263 additions and 46 deletions
8
.github/workflows/ci.yml
vendored
8
.github/workflows/ci.yml
vendored
|
@ -46,6 +46,14 @@ jobs:
|
|||
wget -qO- get.nextflow.io | bash
|
||||
sudo mv nextflow /usr/local/bin/
|
||||
|
||||
- name: Show current locale
|
||||
run: locale
|
||||
|
||||
- name: Set UTF-8 enabled locale
|
||||
run: |
|
||||
sudo locale-gen en_US.UTF-8
|
||||
sudo update-locale LANG=en_US.UTF-8
|
||||
|
||||
- name: Run pipeline with test data
|
||||
# TODO nf-core: You can customise CI pipeline run tests as required
|
||||
# For example: adding multiple test runs with different parameters
|
||||
|
|
|
@ -34,6 +34,10 @@
|
|||
|
||||
> Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. “Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico.” Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6.
|
||||
|
||||
- [MetaPhlAn3](https://doi.org/10.7554/eLife.65088)
|
||||
|
||||
> Beghini, Francesco, Lauren J McIver, Aitor Blanco-Míguez, Leonard Dubois, Francesco Asnicar, Sagun Maharjan, Ana Mailyan, et al. 2021. “Integrating Taxonomic, Functional, and Strain-Level Profiling of Diverse Microbial Communities with BioBakery 3.” Edited by Peter Turnbaugh, Eduardo Franco, and C Titus Brown. ELife 10 (May): e65088.
|
||||
|
||||
## Software packaging/containerisation tools
|
||||
|
||||
- [Anaconda](https://anaconda.com)
|
||||
|
|
|
@ -170,6 +170,15 @@ process {
|
|||
]
|
||||
}
|
||||
|
||||
withName: METAPHLAN3 {
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/metaphlan3/${meta.db_name}" },
|
||||
mode: params.publish_dir_mode,
|
||||
pattern: '*.{biom,txt}'
|
||||
]
|
||||
ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
|
||||
}
|
||||
|
||||
withName: CUSTOM_DUMPSOFTWAREVERSIONS {
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/pipeline_info" },
|
||||
|
|
|
@ -26,6 +26,7 @@ params {
|
|||
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
|
||||
run_kraken2 = true
|
||||
run_malt = true
|
||||
run_metaphlan3 = true
|
||||
shortread_clipmerge = true
|
||||
|
||||
}
|
||||
|
|
|
@ -24,6 +24,9 @@
|
|||
"malt/run": {
|
||||
"git_sha": "72b96f4e504eef673f2b5c13560a9d90b669129b"
|
||||
},
|
||||
"metaphlan3": {
|
||||
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
||||
},
|
||||
"multiqc": {
|
||||
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
||||
},
|
||||
|
|
31
modules/local/ensure_fastq_extension.nf
Normal file
31
modules/local/ensure_fastq_extension.nf
Normal file
|
@ -0,0 +1,31 @@
|
|||
process ENSURE_FASTQ_EXTENSION {
|
||||
tag "$meta.id"
|
||||
label 'process_low'
|
||||
|
||||
conda (params.enable_conda ? "conda-forge::bash=5.0" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv2/biocontainers_v1.2.0_cv2.img' :
|
||||
'biocontainers/biocontainers:v1.2.0_cv2' }"
|
||||
|
||||
|
||||
input:
|
||||
tuple val(meta), path(reads)
|
||||
|
||||
output:
|
||||
tuple val(meta), path('*.fastq.gz'), emit: reads
|
||||
|
||||
script:
|
||||
if (meta.single_end) {
|
||||
fastq = "${reads.baseName}.fastq.gz"
|
||||
"""
|
||||
ln -s '${reads}' '${fastq}'
|
||||
"""
|
||||
} else {
|
||||
first = "${reads[0].baseName}.fastq.gz"
|
||||
second = "${reads[1].baseName}.fastq.gz"
|
||||
"""
|
||||
ln -s '${reads[0]}' '${first}'
|
||||
ln -s '${reads[1]}' '${second}'
|
||||
"""
|
||||
}
|
||||
}
|
45
modules/nf-core/modules/metaphlan3/main.nf
generated
Normal file
45
modules/nf-core/modules/metaphlan3/main.nf
generated
Normal file
|
@ -0,0 +1,45 @@
|
|||
process METAPHLAN3 {
|
||||
tag "$meta.id"
|
||||
label 'process_high'
|
||||
|
||||
conda (params.enable_conda ? 'bioconda::metaphlan=3.0.12' : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/metaphlan:3.0.12--pyhb7b1952_0' :
|
||||
'quay.io/biocontainers/metaphlan:3.0.12--pyhb7b1952_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(input)
|
||||
path metaphlan_db
|
||||
|
||||
output:
|
||||
tuple val(meta), path("*_profile.txt") , emit: profile
|
||||
tuple val(meta), path("*.biom") , emit: biom
|
||||
tuple val(meta), path('*.bowtie2out.txt'), optional:true, emit: bt2out
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def input_type = ("$input".endsWith(".fastq.gz")) ? "--input_type fastq" : ("$input".contains(".fasta")) ? "--input_type fasta" : ("$input".endsWith(".bowtie2out.txt")) ? "--input_type bowtie2out" : "--input_type sam"
|
||||
def input_data = ("$input_type".contains("fastq")) && !meta.single_end ? "${input[0]},${input[1]}" : "$input"
|
||||
def bowtie2_out = "$input_type" == "--input_type bowtie2out" || "$input_type" == "--input_type sam" ? '' : "--bowtie2out ${prefix}.bowtie2out.txt"
|
||||
|
||||
"""
|
||||
metaphlan \\
|
||||
--nproc $task.cpus \\
|
||||
$input_type \\
|
||||
$input_data \\
|
||||
$args \\
|
||||
$bowtie2_out \\
|
||||
--bowtie2db ${metaphlan_db} \\
|
||||
--biom ${prefix}.biom \\
|
||||
--output_file ${prefix}_profile.txt
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
metaphlan3: \$(metaphlan --version 2>&1 | awk '{print \$3}')
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
52
modules/nf-core/modules/metaphlan3/meta.yml
generated
Normal file
52
modules/nf-core/modules/metaphlan3/meta.yml
generated
Normal file
|
@ -0,0 +1,52 @@
|
|||
name: metaphlan3
|
||||
description: MetaPhlAn is a tool for profiling the composition of microbial communities from metagenomic shotgun sequencing data.
|
||||
keywords:
|
||||
- metagenomics
|
||||
- classification
|
||||
- fastq
|
||||
- bam
|
||||
- fasta
|
||||
tools:
|
||||
- metaphlan3:
|
||||
description: Identify clades (phyla to species) present in the metagenome obtained from a microbiome sample and their relative abundance
|
||||
homepage: https://huttenhower.sph.harvard.edu/metaphlan/
|
||||
documentation: https://github.com/biobakery/MetaPhlAn
|
||||
doi: "10.7554/eLife.65088"
|
||||
licence: ["MIT License"]
|
||||
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- input:
|
||||
type: file
|
||||
description: Metaphlan 3.0 can classify the metagenome from a variety of input data types, including FASTQ files (single-end and paired-end), FASTA, bowtie2-produced SAM files (produced from alignments to the MetaPHlAn marker database) and intermediate bowtie2 alignment files (bowtie2out)
|
||||
pattern: "*.{fastq.gz, fasta, fasta.gz, sam, bowtie2out.txt}"
|
||||
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
- profile:
|
||||
type: file
|
||||
description: Tab-separated output file of the predicted taxon relative abundances
|
||||
pattern: "*.{txt}"
|
||||
- biom:
|
||||
type: file
|
||||
description: General-use format for representing biological sample by observation contingency tables
|
||||
pattern: "*.{biom}"
|
||||
- bowtie2out:
|
||||
type: file
|
||||
description: Intermediate Bowtie2 output produced from mapping the metagenome against the MetaPHlAn marker database ( not compatible with `bowtie2out` files generated with MetaPhlAn versions below 3 )
|
||||
pattern: "*.{bowtie2out.txt}"
|
||||
|
||||
authors:
|
||||
- "@MGordon09"
|
|
@ -71,6 +71,9 @@ params {
|
|||
|
||||
// kraken2
|
||||
run_kraken2 = false
|
||||
|
||||
// metaphlan3
|
||||
run_metaphlan3 = false
|
||||
}
|
||||
|
||||
// Load base.config by default for all pipelines
|
||||
|
@ -155,7 +158,7 @@ if (!params.igenomes_ignore) {
|
|||
// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable.
|
||||
|
||||
env {
|
||||
PYTHONNOUSERSITE = 1
|
||||
PYTHONNOUSERSITE = '1'
|
||||
R_PROFILE_USER = "/.Rprofile"
|
||||
R_ENVIRON_USER = "/.Renviron"
|
||||
JULIA_DEPOT_PATH = "/usr/local/share/julia"
|
||||
|
|
|
@ -282,6 +282,10 @@
|
|||
"run_kraken2": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"run_metaphlan3": {
|
||||
"type": "boolean",
|
||||
"description": "Enable MetaPhlAn for taxonomic profiling"
|
||||
},
|
||||
"shortread_clipmerge_tool": {
|
||||
"type": "string",
|
||||
"default": "fastp",
|
||||
|
|
|
@ -31,9 +31,9 @@ workflow INPUT_CHECK {
|
|||
.set { fasta }
|
||||
|
||||
emit:
|
||||
fastq // channel: [ val(meta), [ reads ] ]
|
||||
nanopore // channel: [ val(meta), [ reads ] ]
|
||||
fasta // channel: [ val(meta), fasta ]
|
||||
fastq = fastq ?: [] // channel: [ val(meta), [ reads ] ]
|
||||
nanopore = nanopore ?: [] // channel: [ val(meta), [ reads ] ]
|
||||
fasta = fasta ?: [] // channel: [ val(meta), fasta ]
|
||||
versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ]
|
||||
}
|
||||
|
||||
|
|
|
@ -5,6 +5,11 @@ Process short raw reads with AdapterRemoval
|
|||
include { ADAPTERREMOVAL as ADAPTERREMOVAL_SINGLE } from '../../modules/nf-core/modules/adapterremoval/main'
|
||||
include { ADAPTERREMOVAL as ADAPTERREMOVAL_PAIRED } from '../../modules/nf-core/modules/adapterremoval/main'
|
||||
include { CAT_FASTQ } from '../../modules/nf-core/modules/cat/fastq/main'
|
||||
include {
|
||||
ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION1;
|
||||
ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION2;
|
||||
ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION3;
|
||||
} from '../../modules/local/ensure_fastq_extension'
|
||||
|
||||
workflow SHORTREAD_ADAPTERREMOVAL {
|
||||
|
||||
|
@ -24,63 +29,101 @@ workflow SHORTREAD_ADAPTERREMOVAL {
|
|||
ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, [] )
|
||||
ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, [] )
|
||||
|
||||
// due to the slightly ugly output implementation of the current AdapterRemoval2 version, each file
|
||||
// has to be exported in a separate channel, and we must manually recombine when necessary
|
||||
/*
|
||||
* Due to the ~slightly~ very ugly output implementation of the current AdapterRemoval2 version, each file
|
||||
* has to be exported in a separate channel and we must manually recombine when necessary.
|
||||
*/
|
||||
|
||||
if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) {
|
||||
ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed
|
||||
.mix(
|
||||
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated,
|
||||
ADAPTERREMOVAL_PAIRED.out.singles_truncated,
|
||||
ADAPTERREMOVAL_PAIRED.out.pair1_truncated,
|
||||
ADAPTERREMOVAL_PAIRED.out.pair2_truncated
|
||||
)
|
||||
.map {
|
||||
meta, reads ->
|
||||
def meta_new = meta.clone()
|
||||
meta_new.single_end = true
|
||||
|
||||
[ meta_new, reads ]
|
||||
}
|
||||
.groupTuple()
|
||||
ENSURE_FASTQ_EXTENSION1(
|
||||
Channel.empty().mix(
|
||||
ADAPTERREMOVAL_PAIRED.out.collapsed,
|
||||
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated,
|
||||
ADAPTERREMOVAL_PAIRED.out.singles_truncated,
|
||||
ADAPTERREMOVAL_PAIRED.out.pair1_truncated,
|
||||
ADAPTERREMOVAL_PAIRED.out.pair2_truncated
|
||||
)
|
||||
.map { meta, reads ->
|
||||
meta.single_end = true
|
||||
[meta, reads]
|
||||
}
|
||||
)
|
||||
|
||||
ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads
|
||||
.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated )
|
||||
CAT_FASTQ(
|
||||
ENSURE_FASTQ_EXTENSION1.out.reads
|
||||
.groupTuple()
|
||||
)
|
||||
|
||||
ENSURE_FASTQ_EXTENSION2(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
|
||||
|
||||
ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
|
||||
.mix(ENSURE_FASTQ_EXTENSION2.out.reads)
|
||||
|
||||
} else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) {
|
||||
ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed
|
||||
.mix( ADAPTERREMOVAL_PAIRED.out.collapsed_truncated )
|
||||
.map {
|
||||
meta, reads ->
|
||||
def meta_new = meta.clone()
|
||||
meta_new['single_end'] = true
|
||||
|
||||
[ meta_new, reads ]
|
||||
}
|
||||
.groupTuple(by: 0)
|
||||
ENSURE_FASTQ_EXTENSION1(
|
||||
Channel.empty().mix(
|
||||
ADAPTERREMOVAL_PAIRED.out.collapsed,
|
||||
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated
|
||||
)
|
||||
.map { meta, reads ->
|
||||
meta.single_end = true
|
||||
[meta, reads]
|
||||
}
|
||||
)
|
||||
|
||||
ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads
|
||||
.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated )
|
||||
CAT_FASTQ(
|
||||
ENSURE_FASTQ_EXTENSION1.out.reads
|
||||
.groupTuple()
|
||||
)
|
||||
|
||||
ENSURE_FASTQ_EXTENSION2(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
|
||||
|
||||
ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
|
||||
.mix(ENSURE_FASTQ_EXTENSION2.out.reads)
|
||||
|
||||
} else {
|
||||
|
||||
ch_adapterremoval_reads_prepped = ADAPTERREMOVAL_PAIRED.out.pair1_truncated
|
||||
.join( ADAPTERREMOVAL_PAIRED.out.pair2_truncated )
|
||||
.groupTuple()
|
||||
.map { meta, pair1, pair2 ->
|
||||
[ meta, [ pair1, pair2 ].flatten() ]
|
||||
}
|
||||
.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated )
|
||||
}
|
||||
ENSURE_FASTQ_EXTENSION1(
|
||||
ADAPTERREMOVAL_PAIRED.out.pair1_truncated
|
||||
.map { meta, reads ->
|
||||
meta.single_end = true
|
||||
[meta, reads]
|
||||
}
|
||||
)
|
||||
|
||||
ch_processed_reads = ch_adapterremoval_reads_prepped
|
||||
ENSURE_FASTQ_EXTENSION2(
|
||||
ADAPTERREMOVAL_PAIRED.out.pair2_truncated
|
||||
.map { meta, reads ->
|
||||
meta.single_end = true
|
||||
[meta, reads]
|
||||
}
|
||||
)
|
||||
|
||||
ENSURE_FASTQ_EXTENSION3(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
|
||||
|
||||
ch_adapterremoval_reads_prepped = ENSURE_FASTQ_EXTENSION1.out.reads
|
||||
.join(ENSURE_FASTQ_EXTENSION2.out.reads)
|
||||
.groupTuple()
|
||||
.map { meta, pair1, pair2 ->
|
||||
meta.single_end = false
|
||||
[ meta, [ pair1, pair2 ].flatten() ]
|
||||
}
|
||||
.mix(ENSURE_FASTQ_EXTENSION3.out.reads)
|
||||
|
||||
}
|
||||
|
||||
ch_versions = ch_versions.mix( ADAPTERREMOVAL_SINGLE.out.versions.first() )
|
||||
ch_versions = ch_versions.mix( ADAPTERREMOVAL_PAIRED.out.versions.first() )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( ADAPTERREMOVAL_PAIRED.out.log.collect{it[1]}, ADAPTERREMOVAL_SINGLE.out.log.collect{it[1]} )
|
||||
ch_multiqc_files = ch_multiqc_files.mix(
|
||||
ADAPTERREMOVAL_PAIRED.out.log.collect{it[1]},
|
||||
ADAPTERREMOVAL_SINGLE.out.log.collect{it[1]}
|
||||
)
|
||||
|
||||
emit:
|
||||
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
|
||||
versions = ch_versions // channel: [ versions.yml ]
|
||||
reads = ch_adapterremoval_reads_prepped // channel: [ val(meta), [ reads ] ]
|
||||
versions = ch_versions // channel: [ versions.yml ]
|
||||
mqc = ch_multiqc_files
|
||||
}
|
||||
|
||||
|
|
|
@ -60,7 +60,7 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/
|
|||
include { CAT_FASTQ } from '../modules/nf-core/modules/cat/fastq/main'
|
||||
include { MALT_RUN } from '../modules/nf-core/modules/malt/run/main'
|
||||
include { KRAKEN2_KRAKEN2 } from '../modules/nf-core/modules/kraken2/kraken2/main'
|
||||
|
||||
include { METAPHLAN3 } from '../modules/nf-core/modules/metaphlan3/main'
|
||||
|
||||
/*
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
@ -130,6 +130,7 @@ workflow TAXPROFILER {
|
|||
.branch {
|
||||
malt: it[2]['tool'] == 'malt'
|
||||
kraken2: it[2]['tool'] == 'kraken2'
|
||||
metaphlan3: it[2]['tool'] == 'metaphlan3'
|
||||
unknown: true
|
||||
}
|
||||
|
||||
|
@ -163,6 +164,14 @@ workflow TAXPROFILER {
|
|||
db: it[3]
|
||||
}
|
||||
|
||||
ch_input_for_metaphlan3 = ch_input_for_profiling.metaphlan3
|
||||
.dump(tag: "input_metaphlan3")
|
||||
.multiMap {
|
||||
it ->
|
||||
reads: [it[0] + it[2], it[1]]
|
||||
db: it[3]
|
||||
}
|
||||
|
||||
/*
|
||||
MODULE: RUN PROFILING
|
||||
*/
|
||||
|
@ -174,6 +183,10 @@ workflow TAXPROFILER {
|
|||
KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db )
|
||||
}
|
||||
|
||||
if ( params.run_metaphlan3 ) {
|
||||
METAPHLAN3 ( ch_input_for_metaphlan3.reads, ch_input_for_metaphlan3.db )
|
||||
}
|
||||
|
||||
/*
|
||||
MODULE: MultiQC
|
||||
*/
|
||||
|
@ -204,6 +217,7 @@ workflow TAXPROFILER {
|
|||
|
||||
// TODO MALT results overwriting per database?
|
||||
// TODO Versions for Karken/MALT not report?
|
||||
// TODO create multiQC module for metaphlan
|
||||
MULTIQC (
|
||||
ch_multiqc_files.collect()
|
||||
)
|
||||
|
|
Loading…
Reference in a new issue