From 29c669766d472ff67337d6fb8a149735cabaac53 Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Sun, 21 Nov 2021 05:17:25 -0700 Subject: [PATCH] add bakta module (#1085) * add bakta module * Update main.nf * Update main.nf Co-authored-by: Harshil Patel --- modules/bakta/functions.nf | 78 ++++++++++++++++ modules/bakta/main.nf | 77 ++++++++++++++++ modules/bakta/meta.yml | 85 ++++++++++++++++++ ...t_versions_yml.cpython-39-pytest-6.2.5.pyc | Bin 3558 -> 0 bytes tests/config/pytest_modules.yml | 4 + tests/modules/bakta/main.nf | 13 +++ tests/modules/bakta/test.yml | 25 ++++++ 7 files changed, 282 insertions(+) create mode 100644 modules/bakta/functions.nf create mode 100644 modules/bakta/main.nf create mode 100644 modules/bakta/meta.yml delete mode 100644 tests/__pycache__/test_versions_yml.cpython-39-pytest-6.2.5.pyc create mode 100644 tests/modules/bakta/main.nf create mode 100644 tests/modules/bakta/test.yml diff --git a/modules/bakta/functions.nf b/modules/bakta/functions.nf new file mode 100644 index 00000000..85628ee0 --- /dev/null +++ b/modules/bakta/functions.nf @@ -0,0 +1,78 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Extract name of module from process name using $task.process +// +def getProcessName(task_process) { + return task_process.tokenize(':')[-1] +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + + // Do not publish versions.yml unless running from pytest workflow + if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) { + return null + } + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } +} diff --git a/modules/bakta/main.nf b/modules/bakta/main.nf new file mode 100644 index 00000000..2939f575 --- /dev/null +++ b/modules/bakta/main.nf @@ -0,0 +1,77 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process BAKTA { + tag "$meta.id" + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "bioconda::bakta=1.2.2" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/bakta:1.2.2--pyhdfd78af_0" + } else { + container "quay.io/biocontainers/bakta:1.2.2--pyhdfd78af_0" + } + + input: + tuple val(meta), path(fasta) + path db + path proteins + path prodigal_tf + + output: + tuple val(meta), path("${prefix}.embl") , emit: embl + tuple val(meta), path("${prefix}.faa") , emit: faa + tuple val(meta), path("${prefix}.ffn") , emit: ffn + tuple val(meta), path("${prefix}.fna") , emit: fna + tuple val(meta), path("${prefix}.gbff") , emit: gbff + tuple val(meta), path("${prefix}.gff3") , emit: gff + tuple val(meta), path("${prefix}.hypotheticals.tsv"), emit: hypotheticals_tsv + tuple val(meta), path("${prefix}.hypotheticals.faa"), emit: hypotheticals_faa + tuple val(meta), path("${prefix}.tsv") , emit: tsv + path "versions.yml" , emit: versions + + script: + prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + def proteins_opt = proteins ? "--proteins ${proteins[0]}" : "" + def prodigal_opt = prodigal_tf ? "--prodigal-tf ${prodigal_tf[0]}" : "" + """ + bakta \\ + $options.args \\ + --threads $task.cpus \\ + --prefix ${prefix} \\ + --db $db \\ + $proteins_opt \\ + $prodigal_tf \\ + $fasta + + cat <<-END_VERSIONS > versions.yml + ${getProcessName(task.process)}: + ${getSoftwareName(task.process)}: \$( echo \$(bakta --version 2>&1) | sed 's/^.*bakta //' ) + END_VERSIONS + """ + + stub: + prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + """ + touch ${prefix}.embl + touch ${prefix}.faa + touch ${prefix}.ffn + touch ${prefix}.fna + touch ${prefix}.gbff + touch ${prefix}.gff3 + touch ${prefix}.hypotheticals.tsv + touch ${prefix}.hypotheticals.faa + touch ${prefix}.tsv + + cat <<-END_VERSIONS > versions.yml + ${getProcessName(task.process)}: + ${getSoftwareName(task.process)}: \$( echo \$(bakta --version 2>&1) | sed 's/^.*bakta //' ) + END_VERSIONS + """ +} diff --git a/modules/bakta/meta.yml b/modules/bakta/meta.yml new file mode 100644 index 00000000..29e6edbe --- /dev/null +++ b/modules/bakta/meta.yml @@ -0,0 +1,85 @@ +name: bakta +description: Rapid annotation of bacterial genomes & plasmids. +keywords: + - annotation + - fasta + - prokaryote +tools: + - bakta: + description: Rapid & standardized annotation of bacterial genomes & plasmids. + homepage: https://github.com/oschwengers/bakta + documentation: https://github.com/oschwengers/bakta + tool_dev_url: https://github.com/oschwengers/bakta + doi: "10.1099/mgen.0.000685" + licence: ['GPL v3'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: | + FASTA file to be annotated. Has to contain at least a non-empty string dummy value. + - db: + type: file + description: | + Path to the Bakta database + - proteins: + type: file + description: FASTA file of trusted proteins to first annotate from (optional) + - prodigal_tf: + type: file + description: Training file to use for Prodigal (optional) + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - tsv: + type: file + description: annotations as simple human readble tab separated values + pattern: "*.tsv" + - gff: + type: file + description: annotations & sequences in GFF3 format + pattern: "*.gff3" + - gbff: + type: file + description: annotations & sequences in (multi) GenBank format + pattern: "*.gbff" + - embl: + type: file + description: annotations & sequences in (multi) EMBL format + pattern: "*.embl" + - fna: + type: file + description: replicon/contig DNA sequences as FASTA + pattern: "*.fna" + - faa: + type: file + description: CDS/sORF amino acid sequences as FASTA + pattern: "*.faa" + - ffn: + type: file + description: feature nucleotide sequences as FASTA + pattern: "*.ffn" + - hypotheticals_tsv: + type: file + description: further information on hypothetical protein CDS as simple human readble tab separated values + pattern: "*.hypotheticals.tsv" + - hypotheticals_faa: + type: file + description: hypothetical protein CDS amino acid sequences as FASTA + pattern: "*.hypotheticals.faa" + +authors: + - "@rpetit3" diff --git a/tests/__pycache__/test_versions_yml.cpython-39-pytest-6.2.5.pyc b/tests/__pycache__/test_versions_yml.cpython-39-pytest-6.2.5.pyc deleted file mode 100644 index 33acb8369a1bc62b5e66e1ed80e2247dd0e2759f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3558 zcmaJ@TaVku73T0FQCdkWt=50fJ2%8xAU@;c_*j71tCg z&y04xD)|!Up)V=?+}g;h4+RSJp}(O&gXmj<0(mOXry?kto--7;v;&k9hjTgS%y%wx z=6LmbRfFfZUyVJ_*0jIjp!gUtco$v~KtnZ30msOtx zcHdT1m3~EitNki`jbLe9>(_wBq}G>Ozs~I4#_lq!Kh=Jt(Go3xsXZnACMEZ^j{Obn zsBLygYW~cVBlwmmqb!sIoX;N%I{4zG*n~!Ek~}55`k^t`5-rvaO-kk@Au+k9{qjxbpJ20feVU+IYv8?e?$%-E#Hn)VR1dVK5g8EOIwYxea|m~^}#a6)g)gcA+b zU_auIhC#G1oc)nM7&#+vk2#(rnULLsap3r&BS%b)g+qPL1~TFY-D=f==RxGq$Pv*c z2j-_-_K-ar?v6pkA zwLXeG%6Mub?#)hx>tLPKc3l+ex@ieSf{?V#nTUctmYO?3w4Is<-Z)6j0GQJXXC8GW zdm_{7d!AsK#EzAkBgR?(*yI5Bb8}lL$s#4g{*sypm_vUjwe$5qYPCkha|&Rsf(} z1YxX+8Z?^KAXQ@PtHdU}4dVg9*~N1VzDw}F53l&hz{rUq`Y>j8$)S!>L-jRH#SxjE zi6MGp@;iv!7shEV(IJLRh%n<*Equ#_SUUKeK->^bmSmY)u@01_7sgYF#V^P&v`}By zXc;3cHugwj#g<5=eORG(Yz+ym;QUaZn=n_6P1w0Amkw)lZBG-whq-!e%nARKT7_Py z(<)tht{>Nu63jJd4LIs2G}?$uK7rXXps){@6Z%*2w-!xl_&DfsB3@E@)E0VxerQLfhXuM+lZtH1)!05f zHV2=9CV(ieqeijyOc(DdX(vJ28b~Xm|0k^E=>Lrvj?kYvLa&RHNEO!sz0S`g74`xB zDb)w`^z?H>SwFLLDaQx<*AI`+HOMFb`b@Vuec(Dr&mU14LPx528K?|iQ}Un>RGHS`SvA)N*>5~AgT|{1kF4ezgEyM+w%}b! z+RyaIn^c3m`Q~OyT#n~$R*t4}GL^kqy_X+fOp&u~Oohc~tGK-7gpqWH(G-&3BAI4# zirKWtjs1-$6G*fS)1@5UcD82!*vc|)k?_ID7tX*7vz$Gi2GXAd3{tTtAhX<7Nwr9S z(Rr}QYCg>S(f z0T~~yAf(5ovk%!ZyHuhmyN$jtv|roUyyrgL_~ibFA8+38kZ()K?}r^SJN|WNX3Dpf5I(`sb z2IGu9iT?QM#wQ#1K7N1Jtlss6V4w|de*?CMoo;-CaROlVJ9hx+`T|+?SBs8wM^UeL z#08uI9Xi#-xs%P_L>U1KQ#%LMF;VC`KLJAdNoa6*bJka0j3$=@2G=d3%o)^$J)R1w zE>;ipV^6$+gey4;nFDtuKe(+7KjG1U39umg%ENnoAe~3-;1#uQWoq4;y$xGJcqq{j z44!lX2r{X}E;e`A*hdR>XWf@f1D^T+`ll@s4dp)E;YXKZ$KbD_&PR&RN}qp5-|k+V zwI62prV~!b+l<4p;?C@J2CNWV-eSG$%=k-au7K?`IM`eG<7z$!o&UM03mzYgs)=&d z^^UltdjBXLU1@A7C!o;5_1K`04o5_5YN<)4!|Mzp$$0FGiF6 z8dmuR&~{p>ojaWOU?N>}vuNpNn81Bq&h|MtCaoMzVR5bI_$A~i