1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-10 23:53:10 +00:00

Merge pull request #82 from jianhong/motus

add motus profile.
This commit is contained in:
James A. Fellows Yates 2022-07-04 14:16:12 +02:00 committed by GitHub
commit 1eba859bf8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 258 additions and 3 deletions

View file

@ -71,3 +71,54 @@ jobs:
# Remember that you can parallelise this by using strategy.matrix
run: |
nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results ${{ matrix.parameters }}
motus:
name: Test mOTUs with workflow parameters
if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/taxprofiler') }}
runs-on: ubuntu-latest
env:
NXF_VER: ${{ matrix.nxf_ver }}
NXF_ANSI_LOG: false
strategy:
matrix:
# Nextflow versions
include:
# Test pipeline minimum Nextflow version
- NXF_VER: "21.10.3"
NXF_EDGE: ""
# Test latest edge release of Nextflow
- NXF_VER: ""
NXF_EDGE: "1"
steps:
- name: Check out pipeline code
uses: actions/checkout@v2
- name: Install Nextflow
env:
NXF_VER: ${{ matrix.NXF_VER }}
# Uncomment only if the edge release is more recent than the latest stable release
# See https://github.com/nextflow-io/nextflow/issues/2467
# NXF_EDGE: ${{ matrix.NXF_EDGE }}
run: |
wget -qO- get.nextflow.io | bash
sudo mv nextflow /usr/local/bin/
- name: Show current locale
run: locale
- name: Set UTF-8 enabled locale
run: |
sudo locale-gen en_US.UTF-8
sudo update-locale LANG=en_US.UTF-8
- name: Prepare the database
run: |
wget https://raw.githubusercontent.com/motu-tool/mOTUs/master/motus/downloadDB.py
python downloadDB.py > download_db_log.txt
echo 'tool,db_name,db_params,db_path' > 'database_motus.csv'
echo 'motus,db_mOTU,,db_mOTU' >> 'database_motus.csv'
- name: Run pipeline with test data
run: |
nextflow run ${GITHUB_WORKSPACE} -profile test_motus,docker --outdir ./results --databases ./database_motus.csv

View file

@ -365,6 +365,13 @@ process {
]
}
withName: MOTUS_PROFILE {
publishDir = [
path: { "${params.outdir}/motus/${meta.db_name}" },
mode: params.publish_dir_mode
]
}
withName: CUSTOM_DUMPSOFTWAREVERSIONS {
publishDir = [
path: { "${params.outdir}/pipeline_info" },

View file

@ -37,6 +37,7 @@ params {
run_metaphlan3 = true
run_centrifuge = true
run_diamond = true
run_motus = false
run_krona = true
malt_save_reads = true
kraken2_save_reads = true

41
conf/test_motus.config Normal file
View file

@ -0,0 +1,41 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.
Use as follows:
nextflow run nf-core/taxprofiler -profile test,<docker/singularity> --outdir <OUTDIR>
----------------------------------------------------------------------------------------
*/
params {
config_profile_name = 'mOTUs Test profile'
config_profile_description = 'Minimal test to check mOTUs function'
// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = '6.GB'
max_time = '6.h'
// Input data
// TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
// TODO nf-core: Give any required params for the test so that command line flags are not needed
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
databases = 'database_motus.csv'
perform_shortread_clipmerge = false
perform_longread_clip = false
perform_shortread_complexityfilter = false
perform_shortread_hostremoval = false
perform_longread_hostremoval = false
perform_runmerging = false
hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = false
run_kraken2 = false
run_malt = false
run_metaphlan3 = false
run_centrifuge = false
run_diamond = false
run_motus = true
}

View file

@ -37,6 +37,7 @@ params {
run_metaphlan3 = true
run_centrifuge = true
run_diamond = true
run_motus = false
run_krona = true
}

View file

@ -37,6 +37,7 @@ params {
run_metaphlan3 = false
run_centrifuge = false
run_diamond = false
run_motus = false
}
process {

View file

@ -79,6 +79,7 @@ kraken2,db1,,/<path>/<to>/kraken2/testdb-kraken2.tar.gz
kraken2,db2,--quick,/<path>/<to>/kraken2/testdb-kraken2.tar.gz
centrifuge,db1,,/<path>/<to>/centrifuge/minigut_cf.tar.gz
metaphlan3,db1,,/<path>/<to>/metaphlan3/metaphlan_database/
motus,db_mOTU,,/<path>/<to>/motus/motus_database/
```
Column specifications are as follows:
@ -133,6 +134,13 @@ Expected (uncompressed) database files for each tool are as follows:
- **DIAMOND** output of `diamond makedb`. Note: requires building with taxonomy files
to generate taxonomic profile. See [DIAMOND documentation](https://github.com/bbuchfink/diamond/wiki/3.-Command-line-options#makedb-options). A file named:
- `<database_name>.dmnd`
- **mOTUs** is composed of code and database together. The mOTUs tools
[`downloadDB`](https://github.com/motu-tool/mOTUs/blob/master/motus/downloadDB.py)
is used to prepare the mOTUs database and create a file with the version information.
The database download step can be time consuming and the database will be consisting
with same release version of the mOTUs tools. The database for same version tools
can be thus reused for multiple runs. Users can download the database once using the script above and
specify the path the database to the TSV table provided to `--databases`.
## Running the pipeline

View file

@ -72,6 +72,9 @@
"minimap2/index": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
},
"motus/profile": {
"git_sha": "6b960f0e75bbb4d5bd301cd3875fa078d0eab4d1"
},
"multiqc": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
},

View file

@ -0,0 +1,54 @@
process MOTUS_PROFILE {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::motus=3.0.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/motus:3.0.1--pyhdfd78af_0':
'quay.io/biocontainers/motus:3.0.1--pyhdfd78af_0' }"
input:
tuple val(meta), path(reads)
path db
output:
tuple val(meta), path("*.out"), emit: out
tuple val(meta), path("*.bam"), optional: true, emit: bam
tuple val(meta), path("*.mgc"), optional: true, emit: mgc
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def inputs = reads[0].getExtension() == 'bam' ?
"-i ${reads}" :
reads[0].getExtension() == 'mgc' ? "-m $reads" :
meta.single_end ?
"-s $reads" : "-f ${reads[0]} -r ${reads[1]}"
def refdb = db ? "-db ${db}" : ""
"""
motus profile \\
$args \\
$inputs \\
$refdb \\
-t $task.cpus \\
-n $prefix \\
-o ${prefix}.out
## mOTUs version number is not available from command line.
## mOTUs save the version number in index database folder.
## mOTUs will check the database version is same version as exec version.
if [ "$db" == "" ]; then
VERSION=\$(echo \$(motus -h 2>&1) | sed 's/^.*Version: //; s/References.*\$//')
else
VERSION=\$(grep motus $db/db_mOTU_versions | sed 's/motus\\t//g')
fi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
mOTUs: \$VERSION
END_VERSIONS
"""
}

View file

@ -0,0 +1,61 @@
name: "motus_profile"
description: Taxonomic meta-omics profiling using universal marker genes
keywords:
- classify
- metagenomics
- fastq
- taxonomic profiling
tools:
- "motus":
description: "Marker gene-based OTU (mOTU) profiling"
homepage: "https://motu-tool.org/"
documentation: "https://github.com/motu-tool/mOTUs/wiki"
tool_dev_url: "https://github.com/motu-tool/mOTUs"
doi: "10.1038/s41467-019-08844-4"
licence: "['GPL v3']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input fastq/fasta files of size 1 and 2 for single-end and paired-end data,
respectively.
Or the intermediate bam file mapped by bwa to the mOTUs database or
the output bam file from motus profile.
Or the intermediate mgc read counts table.
pattern: "*.{fastq,fq,fasta,fa,fastq.gz,fq.gz,fasta.gz,fa.gz,.bam,.mgc}"
- db:
type: directory
description: |
mOTUs database downloaded by `motus downloadDB`
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- out:
type: file
description: Results with taxonomic classification of each read
pattern: "*.out"
- bam:
type: file
description: Optional intermediate sorted BAM file from BWA
pattern: "*.{bam}"
- mgc:
type: file
description: Optional intermediate mgc read count table file saved with `-M`.
pattern: "*.{mgc}"
authors:
- "@jianhong"

View file

@ -126,6 +126,9 @@ params {
diamond_output_format = 'tsv' // TSV is only format with taxonomic information apparently
diamond_save_reads = false // this will override default diamond output format so no taxonomic profile is generated!
// mOTUs
run_motus = false
// krona
run_krona = false
}
@ -200,6 +203,7 @@ profiles {
test_full { includeConfig 'conf/test_full.config' }
test_noprofiling { includeConfig 'conf/test_noprofiling.config' }
test_nopreprocessing { includeConfig 'conf/test_nopreprocessing.config' }
test_motus { includeConfig 'conf/test_motus.config' }
}
// Load igenomes.config if required

View file

@ -421,6 +421,9 @@
"type": "integer",
"default": 500000000
},
"run_motus": {
"type": "boolean"
},
"malt_save_reads": {
"type": "boolean"
},

View file

@ -11,7 +11,7 @@ include { METAPHLAN3 } from '../../modules/nf-core/modules/meta
include { KAIJU_KAIJU } from '../../modules/nf-core/modules/kaiju/kaiju/main'
include { KAIJU_KAIJU2TABLE } from '../../modules/nf-core/modules/kaiju/kaiju2table/main'
include { DIAMOND_BLASTX } from '../../modules/nf-core/modules/diamond/blastx/main'
include { MOTUS_PROFILE } from '../../modules/nf-core/modules/motus/profile/main'
workflow PROFILING {
take:
@ -45,6 +45,7 @@ workflow PROFILING {
centrifuge: it[2]['tool'] == 'centrifuge'
kaiju: it[2]['tool'] == 'kaiju'
diamond: it[2]['tool'] == 'diamond'
motus: it[2]['tool'] == 'motus'
unknown: true
}
@ -210,6 +211,25 @@ workflow PROFILING {
}
if ( params.run_motus ) {
ch_input_for_motus = ch_input_for_profiling.motus
.filter{
if (it[0].is_fasta) log.warn "[nf-core/taxprofiler] mOTUs currently does not accept FASTA files as input. Skipping mOTUs for sample ${it[0].id}."
!it[0].is_fasta
}
.multiMap {
it ->
reads: [it[0] + it[2], it[1]]
db: it[3]
}
MOTUS_PROFILE ( ch_input_for_motus.reads, ch_input_for_motus.db )
ch_versions = ch_versions.mix( MOTUS_PROFILE.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( MOTUS_PROFILE.out.out )
}
emit:
classifications = ch_raw_classifications
profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom