Move MetaPhlAn3 to a subcommand and add mergemetaphlantables (#2026)

* Move MetaPhlAn3 to a subcommand and add mergemetaphlantables

* Add mergemetaphlantables tests

* Add mergemetaphlantables to test config

* Apply suggestions from code review

* Revert now unnecessary input channel name change and now document

* Update modules/metaphlan3/mergemetaphlantables/main.nf

* Require database directory and more details in description
This commit is contained in:
James A. Fellows Yates 2022-09-07 21:58:03 +02:00 committed by GitHub
parent 3ef618fed2
commit 940d7fe9d6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 135 additions and 11 deletions

View file

@ -0,0 +1,33 @@
process METAPHLAN3_MERGEMETAPHLANTABLES {
label 'process_single'
conda (params.enable_conda ? 'bioconda::metaphlan=3.0.12' : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/metaphlan:3.0.12--pyhb7b1952_0' :
'quay.io/biocontainers/metaphlan:3.0.12--pyhb7b1952_0' }"
input:
path(profiles)
output:
tuple path("${prefix}.txt") , emit: txt
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "merged_abundance_table"
"""
merge_metaphlan_tables.py \\
$args \\
-o ${prefix}.txt \\
${profiles}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
metaphlan3: \$(metaphlan --version 2>&1 | awk '{print \$3}')
END_VERSIONS
"""
}

View file

@ -0,0 +1,34 @@
name: "metaphlan3_mergemetaphlantables"
description: Merges output abundance tables from MetaPhlAn3
keywords:
- metagenomics
- classification
- merge
- table
- profiles
tools:
- metaphlan3:
description: Identify clades (phyla to species) present in the metagenome obtained from a microbiome sample and their relative abundance
homepage: https://huttenhower.sph.harvard.edu/metaphlan/
documentation: https://github.com/biobakery/MetaPhlAn
doi: "10.7554/eLife.65088"
licence: ["MIT License"]
input:
- profiles:
type: file
description: List of per-sample MetaPhlAn3 taxonomic abundance tables
pattern: "*"
output:
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- txt:
type: txt
description: Combined MetaPhlAn3 table
pattern: "*.txt"
authors:
- "@jfy133"

View file

@ -28,15 +28,18 @@ process METAPHLAN3 {
def bowtie2_out = "$input_type" == "--input_type bowtie2out" || "$input_type" == "--input_type sam" ? '' : "--bowtie2out ${prefix}.bowtie2out.txt" def bowtie2_out = "$input_type" == "--input_type bowtie2out" || "$input_type" == "--input_type sam" ? '' : "--bowtie2out ${prefix}.bowtie2out.txt"
""" """
BT2_DB=`find -L "${metaphlan_db}" -name "*rev.1.bt2" -exec dirname {} \\;`
metaphlan \\ metaphlan \\
--nproc $task.cpus \\ --nproc $task.cpus \\
$input_type \\ $input_type \\
$input_data \\ $input_data \\
$args \\ $args \\
$bowtie2_out \\ $bowtie2_out \\
--bowtie2db ${metaphlan_db} \\ --bowtie2db \$BT2_DB \\
--biom ${prefix}.biom \\ --biom ${prefix}.biom \\
--output_file ${prefix}_profile.txt --output_file ${prefix}_profile.txt
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
"${task.process}": "${task.process}":
metaphlan3: \$(metaphlan --version 2>&1 | awk '{print \$3}') metaphlan3: \$(metaphlan --version 2>&1 | awk '{print \$3}')

View file

@ -24,6 +24,12 @@ input:
type: file type: file
description: Metaphlan 3.0 can classify the metagenome from a variety of input data types, including FASTQ files (single-end and paired-end), FASTA, bowtie2-produced SAM files (produced from alignments to the MetaPHlAn marker database) and intermediate bowtie2 alignment files (bowtie2out) description: Metaphlan 3.0 can classify the metagenome from a variety of input data types, including FASTQ files (single-end and paired-end), FASTA, bowtie2-produced SAM files (produced from alignments to the MetaPHlAn marker database) and intermediate bowtie2 alignment files (bowtie2out)
pattern: "*.{fastq.gz, fasta, fasta.gz, sam, bowtie2out.txt}" pattern: "*.{fastq.gz, fasta, fasta.gz, sam, bowtie2out.txt}"
- metaphlan_db:
type: file
description: |
Directory containing pre-downloaded and uncompressed MetaPhlAn3 database downloaded from: http://cmprod1.cibio.unitn.it/biobakery3/metaphlan_databases/.
Note that you will also need to specify `--index` and the database version name (e.g. 'mpa_v31_CHOCOPhlAn_201901') in your module.conf ext.args for METAPHLAN3_METAPHLAN3!
pattern: "*/"
output: output:
- meta: - meta:

View file

@ -1534,9 +1534,13 @@ metabat2/metabat2:
- modules/metabat2/metabat2/** - modules/metabat2/metabat2/**
- tests/modules/metabat2/metabat2/** - tests/modules/metabat2/metabat2/**
metaphlan3: metaphlan3/metaphlan3:
- modules/metaphlan3/** - modules/metaphlan3/metaphlan3/**
- tests/modules/metaphlan3/** - tests/modules/metaphlan3/metaphlan3/**
metaphlan3/mergemetaphlantables:
- modules/metaphlan3/mergemetaphlantables/**
- tests/modules/metaphlan3/mergemetaphlantables/**
methyldackel/extract: methyldackel/extract:
- modules/methyldackel/extract/** - modules/methyldackel/extract/**

View file

@ -0,0 +1,22 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { UNTAR } from '../../../../modules/untar/main.nf'
include { METAPHLAN3 } from '../../../../modules/metaphlan3/metaphlan3/main.nf'
include { METAPHLAN3_MERGEMETAPHLANTABLES } from '../../../../modules/metaphlan3/mergemetaphlantables/main.nf'
workflow test_metaphlan3_mergemetaphlantables {
input = Channel.of(
[[ id:'test', single_end:true ], [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]],
[[ id:'test2', single_end:true ], [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]]
)
db = [ [], file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/metaphlan_database.tar.gz', checkIfExists: true) ]
UNTAR ( db )
METAPHLAN3 ( input, UNTAR.out.untar.map{ it[1] } )
METAPHLAN3_MERGEMETAPHLANTABLES ( METAPHLAN3.out.profile.map{it[1]}.collect() )
}

View file

@ -2,10 +2,6 @@ process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: SAMTOOLS_VIEW {
ext.prefix = { "${meta.id}.sam" }
}
withName: METAPHLAN3 { withName: METAPHLAN3 {
ext.args = '--index mpa_v30_CHOCOPhlAn_201901 --add_viruses --bt2_ps very-sensitive-local' ext.args = '--index mpa_v30_CHOCOPhlAn_201901 --add_viruses --bt2_ps very-sensitive-local'
} }

View file

@ -0,0 +1,8 @@
- name: metaphlan3 mergemetaphlantables test_metaphlan3_mergemetaphlantables
command: nextflow run ./tests/modules/metaphlan3/mergemetaphlantables -entry test_metaphlan3_mergemetaphlantables -c ./tests/config/nextflow.config -c ./tests/modules/metaphlan3/mergemetaphlantables/nextflow.config
tags:
- metaphlan3
- metaphlan3/mergemetaphlantables
files:
- path: output/metaphlan3/merged_abundance_table.txt
contains: ["clade_name NCBI_tax_id "]

View file

@ -2,9 +2,8 @@
nextflow.enable.dsl = 2 nextflow.enable.dsl = 2
include { UNTAR } from '../../../modules/untar/main.nf' include { UNTAR } from '../../../../modules/untar/main.nf'
include { SAMTOOLS_VIEW } from '../../../modules/samtools/view/main.nf' include { METAPHLAN3 } from '../../../../modules/metaphlan3/metaphlan3/main.nf'
include { METAPHLAN3 } from '../../../modules/metaphlan3/main.nf'
workflow test_metaphlan3_single_end { workflow test_metaphlan3_single_end {
@ -18,6 +17,16 @@ workflow test_metaphlan3_single_end {
METAPHLAN3 ( input, UNTAR.out.untar.map{ it[1] } ) METAPHLAN3 ( input, UNTAR.out.untar.map{ it[1] } )
} }
workflow test_metaphlan3_single_end_nodb {
input = [ [ id:'test', single_end:true ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
]
UNTAR ( db )
METAPHLAN3 ( input, [] )
}
workflow test_metaphlan3_paired_end { workflow test_metaphlan3_paired_end {
input = [ [ id:'test', single_end:false ], // meta map input = [ [ id:'test', single_end:false ], // meta map

View file

@ -0,0 +1,9 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: METAPHLAN3 {
ext.args = '--index mpa_v30_CHOCOPhlAn_201901 --add_viruses --bt2_ps very-sensitive-local'
}
}