Add plasmidid module (#410)

* Adding plasmidid module

* Adding test for plasmidid module

* Update plasmidid version to 1.6.4

* Adding contigs.fasta file

* Update tests

* Adding output files on test.yml
This commit is contained in:
Jose Espinosa-Carrasco 2021-04-08 23:44:30 +02:00 committed by GitHub
parent be390a25e5
commit f90dde0d88
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 297 additions and 2 deletions

View file

@ -0,0 +1,60 @@
/*
* -----------------------------------------------------
* Utility functions used in nf-core DSL2 module files
* -----------------------------------------------------
*/
/*
* Extract name of software tool from process name using $task.process
*/
def getSoftwareName(task_process) {
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
}
/*
* Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
*/
def initOptions(Map args) {
def Map options = [:]
options.args = args.args ?: ''
options.args2 = args.args2 ?: ''
options.args3 = args.args3 ?: ''
options.publish_by_id = args.publish_by_id ?: false
options.publish_dir = args.publish_dir ?: ''
options.publish_files = args.publish_files
options.suffix = args.suffix ?: ''
return options
}
/*
* Tidy up and join elements of a list to return a path string
*/
def getPathFromList(path_list) {
def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
return paths.join('/')
}
/*
* Function to save/publish module results
*/
def saveFiles(Map args) {
if (!args.filename.endsWith('.version.txt')) {
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
if (ioptions.publish_by_id) {
path_list.add(args.publish_id)
}
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
return "${getPathFromList(ext_list)}/$args.filename"
}
}
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
}
}
}

View file

@ -0,0 +1,50 @@
// Import generic module functions
include { initOptions; saveFiles; getSoftwareName } from './functions'
params.options = [:]
options = initOptions(params.options)
process PLASMIDID {
tag "$meta.id"
label 'process_medium'
publishDir "${params.outdir}",
mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
conda (params.enable_conda ? 'bioconda::plasmidid=1.6.4' : null)
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
container 'https://depot.galaxyproject.org/singularity/plasmidid:1.6.4--hdfd78af_3'
} else {
container 'quay.io/biocontainers/plasmidid:1.6.4--hdfd78af_3'
}
input:
tuple val(meta), path(scaffold)
path fasta
output:
tuple val(meta), path("${prefix}/*final_results.html"), emit: html
tuple val(meta), path("${prefix}/*final_results.tab") , emit: tab
tuple val(meta), path("${prefix}/images/") , emit: images
tuple val(meta), path("${prefix}/logs/") , emit: logs
tuple val(meta), path("${prefix}/data/") , emit: data
tuple val(meta), path("${prefix}/database/") , emit: database
tuple val(meta), path("${prefix}/fasta_files/") , emit: fasta_files
tuple val(meta), path("${prefix}/kmer/") , emit: kmer
path '*.version.txt' , emit: version
script:
def software = getSoftwareName(task.process)
prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
"""
plasmidID \\
-d $fasta \\
-s $prefix \\
-c $scaffold \\
$options.args \\
-o .
mv NO_GROUP/$prefix ./$prefix
echo \$(plasmidID --version 2>&1) > ${software}.version.txt
"""
}

View file

@ -0,0 +1,76 @@
name: plasmidid
description: assembles bacterial plasmids
keywords:
- assembly
- plasmid
- bacterial
tools:
- plasmidid:
description: Pipeline for plasmid identification and reconstruction
homepage: https://github.com/BU-ISCIII/plasmidID/wiki
documentation: https://github.com/BU-ISCIII/plasmidID#readme
tool_dev_url: https://github.com/BU-ISCIII/plasmidID
doi: ""
licence: ['GPL v3']
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- scaffold:
type: file
description: |
Fasta file containing scaffold
- fasta:
type: file
description: FASTA reference file
pattern: "*.{fasta,fa}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- html:
type: file
description: html file with results rendered
pattern: "*.{html}"
- tab:
type: file
description: Results in a tabular file
pattern: "*.{tab}"
- images:
type: directory
description: Directory containing the images produced by plasmidid
pattern: "images"
- logs:
type: directory
description: Directory containing the logs produced by plasmidid
pattern: "logs"
- data:
type: directory
description: Directory containing the data produced by plasmidid
pattern: "data"
- database:
type: directory
description: Directory containing the database produced by plasmidid
pattern: "database"
- fasta_files:
type: directory
description: Directory containing the fasta files produced by plasmidid
pattern: "fasta_files"
- kmer:
type: directory
description: Directory containing the kmer files produced by plasmidid
pattern: "database"
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
authors:
- "@joseespinosa"
- "@drpatelh"

View file

@ -354,6 +354,10 @@ picard/mergesamfiles:
- software/picard/mergesamfiles/**
- tests/software/picard/mergesamfiles/**
plasmidid:
- software/plasmidid/**
- tests/software/plasmidid/**
preseq/lcextrap:
- software/preseq/lcextrap/**
- tests/software/preseq/lcextrap/**

View file

@ -23,11 +23,14 @@ params {
kraken2 = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2"
kraken2_tar_gz = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2.tar.gz"
test_wig_gz = "${test_data_dir}/genomics/sarscov2/genome/gcwiggle/test.wig.gz"
test_fas = "${test_data_dir}/genomics/sarscov2/genome/alignment/test.fas"
all_sites_fas = "${test_data_dir}/genomics/sarscov2/genome/alignment/all_sites.fas"
informative_sites_fas = "${test_data_dir}/genomics/sarscov2/genome/alignment/informative_sites.fas"
contigs_fasta = "${test_data_dir}/genomics/sarscov2/genome/contigs.fasta"
}
'illumina' {
test_single_end_bam = "${test_data_dir}/genomics/sarscov2/illumina/bam/test_single_end.bam"

View file

@ -25,6 +25,7 @@ When adding a new module, please check carefully whether the data necessary for
* 'test_genomic.dict': GATK dict for 'test_genomic.fasta'
* 'test_genomic.fasta.fai': fasta index for 'test_genomic.fasta'
* 'test_cds_from_genomic.fasta': coding sequencing from MT192765.1 genome (transcripts)
* 'contigs.fasta': contigs file obtained running Minia on sarscov2 paired-end sequencing reads files (see below)
* fastq
* 'test_{1,2}.fastq.gz' sarscov2 paired-end sequencing reads
* 'test_{1,2}.2.fastq.gz: copies of the above reads

View file

@ -0,0 +1,26 @@
>0 LN:i:168 KC:i:593 km:f:4.300
ACTCATATGAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAATTCTCCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGTGCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCA
>1 LN:i:187 KC:i:376 km:f:2.400
GGTCAAGGTTAATATAGGCATTAACAATGAATAATAAGAATCTACAACAGGAACTCCACTACCTGGCGTGGTTTGTATGAAATCACCGAAATCATACCAGTTACCATTGAGATCTTGATTATCTAATGTCAGTACACCAACAATACCAGCATTTCGCATGGCATCACAGAATTGTACTGTTTTTAAC
>2 LN:i:162 KC:i:409 km:f:3.100
CCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAATTTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAG
>3 LN:i:207 KC:i:548 km:f:3.100
TCATAGATGTCAACTCAAAGCCATGTGCCCATAAGACAAATACGACTCTGTCAGAGAGATTTTTAAGTGTGTCACTTAACATTTGTACAATCTTTATACGCACTACATTCCAAGGAAGTCCTTTGTACATAAGTGGTATGAGGTGTTTAAATTGATCTCCAGGCGGTGGTTTAGCACTAACTCTGGAAAAATCTGTATTATTAGGTG
>4 LN:i:165 KC:i:472 km:f:3.500
TCACCTAATTTAGCATGGCCTCTTATTGTAACAGCTTTAAGGGCCAATTCTGCTGTCAAATTACAGAATAATGAGCTTAGTCCTGTTGCACTACGACAGATGTCTTGTGCTGCCGGTACTACACAAACTGCTTGCACTGATGACAATGCGTTAGCTTACTACAAC
>5 LN:i:174 KC:i:547 km:f:3.800
TCACTTTGAATTTATCAAAACACTCTACACGAGCACGTGCAGGTATAATTCTACTACATTTATCTATAGGCAAATATTTTAATGCCTTCTCACATAGTGCATCAACAGCGGCATGAGAGCAAGCTGTATACACTATGCGAGCAGAAGGGTAGTAGAGAGCTAGGCCAATAGCAA
>6 LN:i:279 KC:i:1037 km:f:4.166
TAATTGCAGATGAAACATCATGCGTGATAACACCCTTATAAAACATTTTAAAGCATTGAGCTGATTTGTCTTTATGTGCTTTAAGCTTATTATCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGTGGTGCAGGTAATTGAGCAGGGTCGCCAATGTACACAT
>7 LN:i:206 KC:i:492 km:f:2.800
TATATGTTTATAGTGACCACACTGGTAATTACCAGTGTACTCACTAGCACAAGTAAATGTACCATGCTTAAGTTCATACTGAGCAGGTGGTGCTGACATCATAACAAAAGGTGACTCCTGTTGTACTAGATATTTTGTAGCTTGTTTACCACACGTACAAGGTATCTGAACACCTTTCTTAAATTGTTCATAAGAAAGTGTGCCCA
>8 LN:i:318 KC:i:1136 km:f:3.944
GCTTTGAGTTGACATCTATGAAGTATTTTGTGAAAATAGGACCTGAGCGCACCTGTTGTCTATGTGATAGACGTGCCACATGCTTTTCCACTGCTTCAGACACTTATGCCTGTTGGCATCATTCTATTGGATTTGATTACGTCTATAATCCGTTTATGATTGATGTTCAACAATGGGGTTTTACAGGTAACCTACAAAGCAACCATGATCTGTATTGTCAAGTCCATGGTAATGCACATGTAGCTAGTTGTGATGCAATCATGACTAGGTGTCTAGCTGTCCACGAGTGCTTTGTTAAGCGTGTTGACTGGACTATTG
>9 LN:i:198 KC:i:386 km:f:2.300
ATATTGCAGCAGTACGCACACAATCGAAGCGCAGTAAGGATGGCTAGTGTAACTAGCAAGAATACCACGAAAGCAAGAAAAAGAAGTACGCTATTAACTATTAACGTACCTGTCTCTTCCGAAACGAATGAGTACATAAGTTCGTACTCATCAGCTTGTGCTTACAAAGGCACGCTAGTAGTCGTCGTCGGTTCATCA
>10 LN:i:178 KC:i:547 km:f:3.700
TGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTCTAACTACTCAGGTGTAGTTACAACTGTCATGTTTTTGGCCAGAGGTATTGTTTTTATGTGTGTTGAGTATTGCCCTATTTTCTTCATAACTGGTAATACACTTCAGTGTATAATGCTAGTT
>11 LN:i:170 KC:i:644 km:f:4.600
GTCTACAAGCTGGTAATGCAACAGAAGTGCCTGCCAATTCAACTGTATTATCTTTCTGTGCTTTTGCTGTAGATGCTGCTAAAGCTTACAAAGATTATCTAGCTAGTGGGGGACAACCAATCACTAATTGTGTTAAGATGTTGTGTACACACACTGGTACTGGTCAGGCA
>12 LN:i:151 KC:i:290 km:f:2.400
AATTTAGTGTCAACACTGAGGTGTGTAGGTGCCTGTGTAGGATGTAACCCAGTGATTACCTTACTACAATCTTTAAAGAGTCCTGTTACATTTTCAGCTTGTAAAGTTGCCACATTCCTACGTGGAATTTCAAGACTTGTAAATTGCAACT

View file

@ -0,0 +1,16 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { PLASMIDID } from '../../../software/plasmidid/main.nf' addParams ( options: ['args' : '-k 0.8'] )
workflow test_plasmidid {
contigs = [ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['genome']['contigs_fasta'], checkIfExists: true)
]
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
PLASMIDID ( contigs, fasta )
}

View file

@ -0,0 +1,59 @@
- name: plasmidid
command: nextflow run ./tests/software/plasmidid -entry test_plasmidid -c tests/config/nextflow.config
tags:
- plasmidid
files:
- path: output/plasmidid/test/data/database_reconstruct_test.length
md5sum: a57c401f27ae5133823fb09fb21c8a3c
- path: output/plasmidid/test/data/test.coverage_adapted_clustered_ac
md5sum: 3a9ea6d336e113a74d7fdca5e7b623fc
- path: output/plasmidid/test/data/test.err
md5sum: 1eabbbcb19d8886182f78b86fc50884c
- path: output/plasmidid/test/data/test.fna
md5sum: 78328536870d04afc54a4dfaf103cac8
- path: output/plasmidid/test/data/test.gbk
md5sum: acc40ebd7864b84fc14e5ad22e30f83f
- path: output/plasmidid/test/data/test.gff
md5sum: 0dd7015f77e2fc3ecaa978362f7334cb
- path: output/plasmidid/test/data/test.karyotype_individual.txt
md5sum: aaf0f5bfe297fb9846ace761ff018d22
- path: output/plasmidid/test/data/test.karyotype_summary.txt
md5sum: aaf0f5bfe297fb9846ace761ff018d22
- path: output/plasmidid/test/data/test.plasmids.blast
md5sum: b089b7a073f4e48fabf0949168581b4c
- path: output/plasmidid/test/data/test.plasmids.blast.links
md5sum: 3528f4e0a046826a9b7f916838b086c6
- path: output/plasmidid/test/data/test.plasmids.complete
md5sum: 8d30d205c471ee3c8151b4c3d90e753b
- path: output/plasmidid/test/database/test.fna
md5sum: 6b843fe652b4369addb382f61952c3dd
- path: output/plasmidid/test/database/test.gbk
md5sum: d2e24369b999bc6a0dae9b7612edb839
- path: output/plasmidid/test/database/test.gff
md5sum: 7e65da147d0a413020b0d92b7b03ffcd
- path: output/plasmidid/test/fasta_files/MT192765.1_term.fasta
md5sum: 2387d7d9c861e6f6e7d9f9395f9bad5c
- path: output/plasmidid/test/images/test_individual.circos.conf
md5sum: f74467ab77232e2b342e2bd408897b12
- path: output/plasmidid/test/images/test_MT192765.1_individual.circos.conf
md5sum: 548443ba0a68065a69383fe32b226780
- path: output/plasmidid/test/images/test_summary.circos.conf
md5sum: e59dc3b77ee610a48b79230da705aba0
- path: output/plasmidid/test/images/test_MT192765.1.png
- path: output/plasmidid/test/images/test_summary.png
- path: output/plasmidid/test/kmer/database.filtered_0.8_term.0.5.clusters.tab
md5sum: cf75e4418631796bb4675129b575915d
- path: output/plasmidid/test/kmer/database.filtered_0.8_term.0.5.clusters.tab
md5sum: cf75e4418631796bb4675129b575915d
- path: output/plasmidid/test/kmer/database.filtered_0.8_term.0.5.representative.fasta
md5sum: 483f4a5dfe60171c86ee9b7e6dff908b
- path: output/plasmidid/test/kmer/database.filtered_0.8_term.0.5.representative.fasta.blast.tmp.ndb
md5sum: f67f8be1e00eb7d3512869071fb4c2c2
- path: output/plasmidid/test/kmer/database.filtered_0.8_term.fasta
md5sum: c7fe0e9e9a94d98d7f85a7be99c032fd
- path: output/plasmidid/test/kmer/database.filtered_0.8_term.mash.distances.tab
md5sum: fd5408c3c2f8f68dc01e0123bb6a9079
- path: output/plasmidid/test/logs/draw_circos_images.log
- path: output/plasmidid/test/logs/plasmidID.log
- path: output/plasmidid/test/test_final_results.html
- path: output/plasmidid/test/test_final_results.tab