mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-11-10 20:23:10 +00:00
Add plasmidid module (#410)
* Adding plasmidid module * Adding test for plasmidid module * Update plasmidid version to 1.6.4 * Adding contigs.fasta file * Update tests * Adding output files on test.yml
This commit is contained in:
parent
be390a25e5
commit
f90dde0d88
9 changed files with 297 additions and 2 deletions
60
software/plasmidid/functions.nf
Normal file
60
software/plasmidid/functions.nf
Normal file
|
@ -0,0 +1,60 @@
|
|||
/*
|
||||
* -----------------------------------------------------
|
||||
* Utility functions used in nf-core DSL2 module files
|
||||
* -----------------------------------------------------
|
||||
*/
|
||||
|
||||
/*
|
||||
* Extract name of software tool from process name using $task.process
|
||||
*/
|
||||
def getSoftwareName(task_process) {
|
||||
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
|
||||
}
|
||||
|
||||
/*
|
||||
* Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
|
||||
*/
|
||||
def initOptions(Map args) {
|
||||
def Map options = [:]
|
||||
options.args = args.args ?: ''
|
||||
options.args2 = args.args2 ?: ''
|
||||
options.args3 = args.args3 ?: ''
|
||||
options.publish_by_id = args.publish_by_id ?: false
|
||||
options.publish_dir = args.publish_dir ?: ''
|
||||
options.publish_files = args.publish_files
|
||||
options.suffix = args.suffix ?: ''
|
||||
return options
|
||||
}
|
||||
|
||||
/*
|
||||
* Tidy up and join elements of a list to return a path string
|
||||
*/
|
||||
def getPathFromList(path_list) {
|
||||
def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
|
||||
paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
|
||||
return paths.join('/')
|
||||
}
|
||||
|
||||
/*
|
||||
* Function to save/publish module results
|
||||
*/
|
||||
def saveFiles(Map args) {
|
||||
if (!args.filename.endsWith('.version.txt')) {
|
||||
def ioptions = initOptions(args.options)
|
||||
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
|
||||
if (ioptions.publish_by_id) {
|
||||
path_list.add(args.publish_id)
|
||||
}
|
||||
if (ioptions.publish_files instanceof Map) {
|
||||
for (ext in ioptions.publish_files) {
|
||||
if (args.filename.endsWith(ext.key)) {
|
||||
def ext_list = path_list.collect()
|
||||
ext_list.add(ext.value)
|
||||
return "${getPathFromList(ext_list)}/$args.filename"
|
||||
}
|
||||
}
|
||||
} else if (ioptions.publish_files == null) {
|
||||
return "${getPathFromList(path_list)}/$args.filename"
|
||||
}
|
||||
}
|
||||
}
|
50
software/plasmidid/main.nf
Normal file
50
software/plasmidid/main.nf
Normal file
|
@ -0,0 +1,50 @@
|
|||
// Import generic module functions
|
||||
include { initOptions; saveFiles; getSoftwareName } from './functions'
|
||||
|
||||
params.options = [:]
|
||||
options = initOptions(params.options)
|
||||
|
||||
process PLASMIDID {
|
||||
tag "$meta.id"
|
||||
label 'process_medium'
|
||||
publishDir "${params.outdir}",
|
||||
mode: params.publish_dir_mode,
|
||||
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
|
||||
|
||||
conda (params.enable_conda ? 'bioconda::plasmidid=1.6.4' : null)
|
||||
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
|
||||
container 'https://depot.galaxyproject.org/singularity/plasmidid:1.6.4--hdfd78af_3'
|
||||
} else {
|
||||
container 'quay.io/biocontainers/plasmidid:1.6.4--hdfd78af_3'
|
||||
}
|
||||
|
||||
input:
|
||||
tuple val(meta), path(scaffold)
|
||||
path fasta
|
||||
|
||||
output:
|
||||
tuple val(meta), path("${prefix}/*final_results.html"), emit: html
|
||||
tuple val(meta), path("${prefix}/*final_results.tab") , emit: tab
|
||||
tuple val(meta), path("${prefix}/images/") , emit: images
|
||||
tuple val(meta), path("${prefix}/logs/") , emit: logs
|
||||
tuple val(meta), path("${prefix}/data/") , emit: data
|
||||
tuple val(meta), path("${prefix}/database/") , emit: database
|
||||
tuple val(meta), path("${prefix}/fasta_files/") , emit: fasta_files
|
||||
tuple val(meta), path("${prefix}/kmer/") , emit: kmer
|
||||
path '*.version.txt' , emit: version
|
||||
|
||||
script:
|
||||
def software = getSoftwareName(task.process)
|
||||
prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
|
||||
"""
|
||||
plasmidID \\
|
||||
-d $fasta \\
|
||||
-s $prefix \\
|
||||
-c $scaffold \\
|
||||
$options.args \\
|
||||
-o .
|
||||
|
||||
mv NO_GROUP/$prefix ./$prefix
|
||||
echo \$(plasmidID --version 2>&1) > ${software}.version.txt
|
||||
"""
|
||||
}
|
76
software/plasmidid/meta.yml
Normal file
76
software/plasmidid/meta.yml
Normal file
|
@ -0,0 +1,76 @@
|
|||
name: plasmidid
|
||||
description: assembles bacterial plasmids
|
||||
keywords:
|
||||
- assembly
|
||||
- plasmid
|
||||
- bacterial
|
||||
tools:
|
||||
- plasmidid:
|
||||
description: Pipeline for plasmid identification and reconstruction
|
||||
homepage: https://github.com/BU-ISCIII/plasmidID/wiki
|
||||
documentation: https://github.com/BU-ISCIII/plasmidID#readme
|
||||
tool_dev_url: https://github.com/BU-ISCIII/plasmidID
|
||||
doi: ""
|
||||
licence: ['GPL v3']
|
||||
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- scaffold:
|
||||
type: file
|
||||
description: |
|
||||
Fasta file containing scaffold
|
||||
- fasta:
|
||||
type: file
|
||||
description: FASTA reference file
|
||||
pattern: "*.{fasta,fa}"
|
||||
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- html:
|
||||
type: file
|
||||
description: html file with results rendered
|
||||
pattern: "*.{html}"
|
||||
- tab:
|
||||
type: file
|
||||
description: Results in a tabular file
|
||||
pattern: "*.{tab}"
|
||||
- images:
|
||||
type: directory
|
||||
description: Directory containing the images produced by plasmidid
|
||||
pattern: "images"
|
||||
- logs:
|
||||
type: directory
|
||||
description: Directory containing the logs produced by plasmidid
|
||||
pattern: "logs"
|
||||
- data:
|
||||
type: directory
|
||||
description: Directory containing the data produced by plasmidid
|
||||
pattern: "data"
|
||||
- database:
|
||||
type: directory
|
||||
description: Directory containing the database produced by plasmidid
|
||||
pattern: "database"
|
||||
- fasta_files:
|
||||
type: directory
|
||||
description: Directory containing the fasta files produced by plasmidid
|
||||
pattern: "fasta_files"
|
||||
- kmer:
|
||||
type: directory
|
||||
description: Directory containing the kmer files produced by plasmidid
|
||||
pattern: "database"
|
||||
- version:
|
||||
type: file
|
||||
description: File containing software version
|
||||
pattern: "*.{version.txt}"
|
||||
|
||||
authors:
|
||||
- "@joseespinosa"
|
||||
- "@drpatelh"
|
|
@ -354,6 +354,10 @@ picard/mergesamfiles:
|
|||
- software/picard/mergesamfiles/**
|
||||
- tests/software/picard/mergesamfiles/**
|
||||
|
||||
plasmidid:
|
||||
- software/plasmidid/**
|
||||
- tests/software/plasmidid/**
|
||||
|
||||
preseq/lcextrap:
|
||||
- software/preseq/lcextrap/**
|
||||
- tests/software/preseq/lcextrap/**
|
||||
|
|
|
@ -23,11 +23,14 @@ params {
|
|||
|
||||
kraken2 = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2"
|
||||
kraken2_tar_gz = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2.tar.gz"
|
||||
|
||||
|
||||
test_wig_gz = "${test_data_dir}/genomics/sarscov2/genome/gcwiggle/test.wig.gz"
|
||||
|
||||
|
||||
test_fas = "${test_data_dir}/genomics/sarscov2/genome/alignment/test.fas"
|
||||
all_sites_fas = "${test_data_dir}/genomics/sarscov2/genome/alignment/all_sites.fas"
|
||||
informative_sites_fas = "${test_data_dir}/genomics/sarscov2/genome/alignment/informative_sites.fas"
|
||||
|
||||
contigs_fasta = "${test_data_dir}/genomics/sarscov2/genome/contigs.fasta"
|
||||
}
|
||||
'illumina' {
|
||||
test_single_end_bam = "${test_data_dir}/genomics/sarscov2/illumina/bam/test_single_end.bam"
|
||||
|
|
|
@ -25,6 +25,7 @@ When adding a new module, please check carefully whether the data necessary for
|
|||
* 'test_genomic.dict': GATK dict for 'test_genomic.fasta'
|
||||
* 'test_genomic.fasta.fai': fasta index for 'test_genomic.fasta'
|
||||
* 'test_cds_from_genomic.fasta': coding sequencing from MT192765.1 genome (transcripts)
|
||||
* 'contigs.fasta': contigs file obtained running Minia on sarscov2 paired-end sequencing reads files (see below)
|
||||
* fastq
|
||||
* 'test_{1,2}.fastq.gz' sarscov2 paired-end sequencing reads
|
||||
* 'test_{1,2}.2.fastq.gz‘: copies of the above reads
|
||||
|
|
26
tests/data/genomics/sarscov2/genome/contigs.fasta
Normal file
26
tests/data/genomics/sarscov2/genome/contigs.fasta
Normal file
|
@ -0,0 +1,26 @@
|
|||
>0 LN:i:168 KC:i:593 km:f:4.300
|
||||
ACTCATATGAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAATTCTCCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGTGCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCA
|
||||
>1 LN:i:187 KC:i:376 km:f:2.400
|
||||
GGTCAAGGTTAATATAGGCATTAACAATGAATAATAAGAATCTACAACAGGAACTCCACTACCTGGCGTGGTTTGTATGAAATCACCGAAATCATACCAGTTACCATTGAGATCTTGATTATCTAATGTCAGTACACCAACAATACCAGCATTTCGCATGGCATCACAGAATTGTACTGTTTTTAAC
|
||||
>2 LN:i:162 KC:i:409 km:f:3.100
|
||||
CCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAATTTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAG
|
||||
>3 LN:i:207 KC:i:548 km:f:3.100
|
||||
TCATAGATGTCAACTCAAAGCCATGTGCCCATAAGACAAATACGACTCTGTCAGAGAGATTTTTAAGTGTGTCACTTAACATTTGTACAATCTTTATACGCACTACATTCCAAGGAAGTCCTTTGTACATAAGTGGTATGAGGTGTTTAAATTGATCTCCAGGCGGTGGTTTAGCACTAACTCTGGAAAAATCTGTATTATTAGGTG
|
||||
>4 LN:i:165 KC:i:472 km:f:3.500
|
||||
TCACCTAATTTAGCATGGCCTCTTATTGTAACAGCTTTAAGGGCCAATTCTGCTGTCAAATTACAGAATAATGAGCTTAGTCCTGTTGCACTACGACAGATGTCTTGTGCTGCCGGTACTACACAAACTGCTTGCACTGATGACAATGCGTTAGCTTACTACAAC
|
||||
>5 LN:i:174 KC:i:547 km:f:3.800
|
||||
TCACTTTGAATTTATCAAAACACTCTACACGAGCACGTGCAGGTATAATTCTACTACATTTATCTATAGGCAAATATTTTAATGCCTTCTCACATAGTGCATCAACAGCGGCATGAGAGCAAGCTGTATACACTATGCGAGCAGAAGGGTAGTAGAGAGCTAGGCCAATAGCAA
|
||||
>6 LN:i:279 KC:i:1037 km:f:4.166
|
||||
TAATTGCAGATGAAACATCATGCGTGATAACACCCTTATAAAACATTTTAAAGCATTGAGCTGATTTGTCTTTATGTGCTTTAAGCTTATTATCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGTGGTGCAGGTAATTGAGCAGGGTCGCCAATGTACACAT
|
||||
>7 LN:i:206 KC:i:492 km:f:2.800
|
||||
TATATGTTTATAGTGACCACACTGGTAATTACCAGTGTACTCACTAGCACAAGTAAATGTACCATGCTTAAGTTCATACTGAGCAGGTGGTGCTGACATCATAACAAAAGGTGACTCCTGTTGTACTAGATATTTTGTAGCTTGTTTACCACACGTACAAGGTATCTGAACACCTTTCTTAAATTGTTCATAAGAAAGTGTGCCCA
|
||||
>8 LN:i:318 KC:i:1136 km:f:3.944
|
||||
GCTTTGAGTTGACATCTATGAAGTATTTTGTGAAAATAGGACCTGAGCGCACCTGTTGTCTATGTGATAGACGTGCCACATGCTTTTCCACTGCTTCAGACACTTATGCCTGTTGGCATCATTCTATTGGATTTGATTACGTCTATAATCCGTTTATGATTGATGTTCAACAATGGGGTTTTACAGGTAACCTACAAAGCAACCATGATCTGTATTGTCAAGTCCATGGTAATGCACATGTAGCTAGTTGTGATGCAATCATGACTAGGTGTCTAGCTGTCCACGAGTGCTTTGTTAAGCGTGTTGACTGGACTATTG
|
||||
>9 LN:i:198 KC:i:386 km:f:2.300
|
||||
ATATTGCAGCAGTACGCACACAATCGAAGCGCAGTAAGGATGGCTAGTGTAACTAGCAAGAATACCACGAAAGCAAGAAAAAGAAGTACGCTATTAACTATTAACGTACCTGTCTCTTCCGAAACGAATGAGTACATAAGTTCGTACTCATCAGCTTGTGCTTACAAAGGCACGCTAGTAGTCGTCGTCGGTTCATCA
|
||||
>10 LN:i:178 KC:i:547 km:f:3.700
|
||||
TGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTCTAACTACTCAGGTGTAGTTACAACTGTCATGTTTTTGGCCAGAGGTATTGTTTTTATGTGTGTTGAGTATTGCCCTATTTTCTTCATAACTGGTAATACACTTCAGTGTATAATGCTAGTT
|
||||
>11 LN:i:170 KC:i:644 km:f:4.600
|
||||
GTCTACAAGCTGGTAATGCAACAGAAGTGCCTGCCAATTCAACTGTATTATCTTTCTGTGCTTTTGCTGTAGATGCTGCTAAAGCTTACAAAGATTATCTAGCTAGTGGGGGACAACCAATCACTAATTGTGTTAAGATGTTGTGTACACACACTGGTACTGGTCAGGCA
|
||||
>12 LN:i:151 KC:i:290 km:f:2.400
|
||||
AATTTAGTGTCAACACTGAGGTGTGTAGGTGCCTGTGTAGGATGTAACCCAGTGATTACCTTACTACAATCTTTAAAGAGTCCTGTTACATTTTCAGCTTGTAAAGTTGCCACATTCCTACGTGGAATTTCAAGACTTGTAAATTGCAACT
|
16
tests/software/plasmidid/main.nf
Normal file
16
tests/software/plasmidid/main.nf
Normal file
|
@ -0,0 +1,16 @@
|
|||
#!/usr/bin/env nextflow
|
||||
|
||||
nextflow.enable.dsl = 2
|
||||
|
||||
include { PLASMIDID } from '../../../software/plasmidid/main.nf' addParams ( options: ['args' : '-k 0.8'] )
|
||||
|
||||
workflow test_plasmidid {
|
||||
|
||||
contigs = [ [ id:'test' ], // meta map
|
||||
file(params.test_data['sarscov2']['genome']['contigs_fasta'], checkIfExists: true)
|
||||
]
|
||||
|
||||
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
|
||||
|
||||
PLASMIDID ( contigs, fasta )
|
||||
}
|
59
tests/software/plasmidid/test.yml
Normal file
59
tests/software/plasmidid/test.yml
Normal file
|
@ -0,0 +1,59 @@
|
|||
- name: plasmidid
|
||||
command: nextflow run ./tests/software/plasmidid -entry test_plasmidid -c tests/config/nextflow.config
|
||||
tags:
|
||||
- plasmidid
|
||||
files:
|
||||
- path: output/plasmidid/test/data/database_reconstruct_test.length
|
||||
md5sum: a57c401f27ae5133823fb09fb21c8a3c
|
||||
- path: output/plasmidid/test/data/test.coverage_adapted_clustered_ac
|
||||
md5sum: 3a9ea6d336e113a74d7fdca5e7b623fc
|
||||
- path: output/plasmidid/test/data/test.err
|
||||
md5sum: 1eabbbcb19d8886182f78b86fc50884c
|
||||
- path: output/plasmidid/test/data/test.fna
|
||||
md5sum: 78328536870d04afc54a4dfaf103cac8
|
||||
- path: output/plasmidid/test/data/test.gbk
|
||||
md5sum: acc40ebd7864b84fc14e5ad22e30f83f
|
||||
- path: output/plasmidid/test/data/test.gff
|
||||
md5sum: 0dd7015f77e2fc3ecaa978362f7334cb
|
||||
- path: output/plasmidid/test/data/test.karyotype_individual.txt
|
||||
md5sum: aaf0f5bfe297fb9846ace761ff018d22
|
||||
- path: output/plasmidid/test/data/test.karyotype_summary.txt
|
||||
md5sum: aaf0f5bfe297fb9846ace761ff018d22
|
||||
- path: output/plasmidid/test/data/test.plasmids.blast
|
||||
md5sum: b089b7a073f4e48fabf0949168581b4c
|
||||
- path: output/plasmidid/test/data/test.plasmids.blast.links
|
||||
md5sum: 3528f4e0a046826a9b7f916838b086c6
|
||||
- path: output/plasmidid/test/data/test.plasmids.complete
|
||||
md5sum: 8d30d205c471ee3c8151b4c3d90e753b
|
||||
- path: output/plasmidid/test/database/test.fna
|
||||
md5sum: 6b843fe652b4369addb382f61952c3dd
|
||||
- path: output/plasmidid/test/database/test.gbk
|
||||
md5sum: d2e24369b999bc6a0dae9b7612edb839
|
||||
- path: output/plasmidid/test/database/test.gff
|
||||
md5sum: 7e65da147d0a413020b0d92b7b03ffcd
|
||||
- path: output/plasmidid/test/fasta_files/MT192765.1_term.fasta
|
||||
md5sum: 2387d7d9c861e6f6e7d9f9395f9bad5c
|
||||
- path: output/plasmidid/test/images/test_individual.circos.conf
|
||||
md5sum: f74467ab77232e2b342e2bd408897b12
|
||||
- path: output/plasmidid/test/images/test_MT192765.1_individual.circos.conf
|
||||
md5sum: 548443ba0a68065a69383fe32b226780
|
||||
- path: output/plasmidid/test/images/test_summary.circos.conf
|
||||
md5sum: e59dc3b77ee610a48b79230da705aba0
|
||||
- path: output/plasmidid/test/images/test_MT192765.1.png
|
||||
- path: output/plasmidid/test/images/test_summary.png
|
||||
- path: output/plasmidid/test/kmer/database.filtered_0.8_term.0.5.clusters.tab
|
||||
md5sum: cf75e4418631796bb4675129b575915d
|
||||
- path: output/plasmidid/test/kmer/database.filtered_0.8_term.0.5.clusters.tab
|
||||
md5sum: cf75e4418631796bb4675129b575915d
|
||||
- path: output/plasmidid/test/kmer/database.filtered_0.8_term.0.5.representative.fasta
|
||||
md5sum: 483f4a5dfe60171c86ee9b7e6dff908b
|
||||
- path: output/plasmidid/test/kmer/database.filtered_0.8_term.0.5.representative.fasta.blast.tmp.ndb
|
||||
md5sum: f67f8be1e00eb7d3512869071fb4c2c2
|
||||
- path: output/plasmidid/test/kmer/database.filtered_0.8_term.fasta
|
||||
md5sum: c7fe0e9e9a94d98d7f85a7be99c032fd
|
||||
- path: output/plasmidid/test/kmer/database.filtered_0.8_term.mash.distances.tab
|
||||
md5sum: fd5408c3c2f8f68dc01e0123bb6a9079
|
||||
- path: output/plasmidid/test/logs/draw_circos_images.log
|
||||
- path: output/plasmidid/test/logs/plasmidID.log
|
||||
- path: output/plasmidid/test/test_final_results.html
|
||||
- path: output/plasmidid/test/test_final_results.tab
|
Loading…
Reference in a new issue