add module for panaroo, fix pirate tests (#1444)

* add module for panaroo

* update pirate test yaml

* update panaroo test yaml

* update pirate test yaml md5s

* Update main.nf

* Update main.nf

* pin bioperl version
This commit is contained in:
Robert A. Petit III 2022-03-24 20:11:04 -06:00 committed by GitHub
parent 6806cd1de9
commit d16f1ea4d7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 186 additions and 38 deletions

View file

@ -0,0 +1,36 @@
process PANAROO_RUN {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::panaroo=1.2.9" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/panaroo:1.2.9--pyhdfd78af_0':
'quay.io/biocontainers/panaroo:1.2.9--pyhdfd78af_0' }"
input:
tuple val(meta), path(gff)
output:
tuple val(meta), path("results/*") , emit: results
tuple val(meta), path("results/core_gene_alignment.aln"), optional: true, emit: aln
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
panaroo \\
$args \\
-t $task.cpus \\
-o results \\
-i $gff
cat <<-END_VERSIONS > versions.yml
"${task.process}":
panaroo: \$(echo \$(panaroo --version 2>&1) | sed 's/^.*panaroo //' ))
END_VERSIONS
"""
}

View file

@ -0,0 +1,47 @@
name: "panaroo_run"
description: A fast and scalable tool for bacterial pangenome analysis
keywords:
- gff
- pan-genome
- alignment
tools:
- "panaroo":
description: "panaroo - an updated pipeline for pangenome investigation"
homepage: "https://gtonkinhill.github.io/panaroo/#/"
documentation: "https://gtonkinhill.github.io/panaroo/#/gettingstarted/quickstart"
tool_dev_url: "https://github.com/gtonkinhill/panaroo"
doi: "10.1186/s13059-020-02090-4"
licence: "['MIT']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- gff:
type: file
description: A set of GFF3 formatted files
pattern: "*.{gff}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- results:
type: directory
description: Directory containing Panaroo result files
pattern: "*/*"
- aln:
type: file
description: Core-genome alignment produced by Panaroo (Optional)
pattern: "*.{fasta}"
authors:
- "@rpetit3"

View file

@ -2,10 +2,10 @@ process PIRATE {
tag "$meta.id" tag "$meta.id"
label 'process_medium' label 'process_medium'
conda (params.enable_conda ? "bioconda::pirate=1.0.4" : null) conda (params.enable_conda ? "bioconda::pirate=1.0.4 bioconda::perl-bioperl=1.7.2" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/pirate:1.0.4--hdfd78af_1' : 'https://depot.galaxyproject.org/singularity/pirate:1.0.4--hdfd78af_2' :
'quay.io/biocontainers/pirate:1.0.4--hdfd78af_1' }" 'quay.io/biocontainers/pirate:1.0.4--hdfd78af_2' }"
input: input:
tuple val(meta), path(gff) tuple val(meta), path(gff)

View file

@ -1248,6 +1248,10 @@ pairtools/sort:
- modules/pairtools/sort/** - modules/pairtools/sort/**
- tests/modules/pairtools/sort/** - tests/modules/pairtools/sort/**
panaroo/run:
- modules/panaroo/run/**
- tests/modules/panaroo/run/**
pangolin: pangolin:
- modules/pangolin/** - modules/pangolin/**
- tests/modules/pangolin/** - tests/modules/pangolin/**

View file

@ -0,0 +1,19 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { PANAROO_RUN } from '../../../../modules/panaroo/run/main.nf'
workflow test_panaroo_run {
input = [
[ id:'test', single_end:false ], // meta map
[
file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['test1_gff'], checkIfExists: true),
file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['test2_gff'], checkIfExists: true),
file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['test3_gff'], checkIfExists: true)
]
]
PANAROO_RUN ( input )
}

View file

@ -0,0 +1,8 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: PANAROO_RUN {
ext.args = '--clean-mode strict -a core'
}
}

View file

@ -0,0 +1,38 @@
- name: panaroo run test_panaroo_run
command: nextflow run tests/modules/panaroo/run -entry test_panaroo_run -c tests/config/nextflow.config
tags:
- panaroo/run
- panaroo
files:
- path: output/panaroo/results/combined_DNA_CDS.fasta
md5sum: 92bd4f6b9c84c84c0989d77d2558b7d1
- path: output/panaroo/results/combined_protein_CDS.fasta
md5sum: 7a5636c4f2abc190f5dd4dfbb1da7034
- path: output/panaroo/results/combined_protein_cdhit_out.txt
md5sum: 2164b81606712b53af2198b4d50eed08
- path: output/panaroo/results/combined_protein_cdhit_out.txt.clstr
md5sum: aaa9fc2b797c932b6d9ff2df640113f1
- path: output/panaroo/results/core_alignment_header.embl
contains: ["DNA", "feature", "locus_tag"]
- path: output/panaroo/results/core_gene_alignment.aln
contains: ["test1", "test2", "test3"]
- path: output/panaroo/results/final_graph.gml
contains: ["graph", "isolateNames", "test3"]
- path: output/panaroo/results/gene_data.csv
md5sum: 45df2d26ac81754b858c0e4757e834b2
- path: output/panaroo/results/gene_presence_absence.Rtab
md5sum: 9efbed5df121dc0c4fbb58869b65c3e4
- path: output/panaroo/results/gene_presence_absence.csv
contains: ["Gene", "unique", "test3"]
- path: output/panaroo/results/gene_presence_absence_roary.csv
contains: ["Gene", "Max", "Fragment"]
- path: output/panaroo/results/pan_genome_reference.fa
md5sum: d0b03cdfaab8965d86cb1045db3c2d20
- path: output/panaroo/results/pre_filt_graph.gml
contains: ["graph", "node", "id"]
- path: output/panaroo/results/struct_presence_absence.Rtab
md5sum: 246017db1175f9b1683dfe60cb2e9b5b
- path: output/panaroo/results/summary_statistics.txt
md5sum: 9c73c416e1758bea06c67b4ec9fd0720
- path: output/panaroo/versions.yml
md5sum: 909e7b046a8a87319986d34b7328641c

View file

@ -14,10 +14,6 @@ workflow test_pirate {
file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['test3_gff'], checkIfExists: true) file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['test3_gff'], checkIfExists: true)
] ]
] ]
// [ file("https://github.com/bactopia/bactopia-tests/raw/main/data/reference/gff/GCF_000292685.gff", checkIfExists: true),
// file("https://github.com/bactopia/bactopia-tests/raw/main/data/reference/gff/GCF_000298385.gff", checkIfExists: true),
// file("https://github.com/bactopia/bactopia-tests/raw/main/data/reference/gff/GCF_002849995.gff", checkIfExists: true) ]
// ]
PIRATE ( input ) PIRATE ( input )
} }

View file

@ -16,35 +16,35 @@
- path: output/pirate/results/PIRATE.unique_alleles.tsv - path: output/pirate/results/PIRATE.unique_alleles.tsv
contains: ["allele_name"] contains: ["allele_name"]
- path: output/pirate/results/binary_presence_absence.fasta - path: output/pirate/results/binary_presence_absence.fasta
contains: ["GCF_000292685"] contains: ["test1"]
- path: output/pirate/results/binary_presence_absence.nwk - path: output/pirate/results/binary_presence_absence.nwk
md5sum: 5b5d86bf97d97de37bb9db514abb7762 md5sum: 1fcf7434911bbab39b74791259f1f989
- path: output/pirate/results/cluster_alleles.tab - path: output/pirate/results/cluster_alleles.tab
contains: ["g0001"] contains: ["g0001"]
- path: output/pirate/results/co-ords/GCF_000292685.co-ords.tab - path: output/pirate/results/co-ords/test1.co-ords.tab
md5sum: d5ca0f06ca7ea1f5486683d5859bc9b8 md5sum: f1e75c045b35bae260dadb1a2f000dfa
- path: output/pirate/results/co-ords/GCF_000298385.co-ords.tab - path: output/pirate/results/co-ords/test2.co-ords.tab
md5sum: a24d6048b3074242bb558c7fa27a8b03 md5sum: 953b0d2f1dfd4c3a6a6dd246c9174703
- path: output/pirate/results/co-ords/GCF_002849995.co-ords.tab - path: output/pirate/results/co-ords/test3.co-ords.tab
md5sum: 0c08228585f4fa95686e9b025e0fe9c1 md5sum: 61f2c52ef77dc9a97a200c57b89b7d69
- path: output/pirate/results/genome2loci.tab - path: output/pirate/results/genome2loci.tab
md5sum: bbcea5bfcdcafe14a9aa7261c8e931b8 md5sum: 2d9cdefd5b1a7f5e20b0a70a6e5fa588
- path: output/pirate/results/genome_list.txt - path: output/pirate/results/genome_list.txt
md5sum: 6534b1635c258ad92b829077addc1ff5 md5sum: c19ac63da7949b15179f42093cbf95b8
- path: output/pirate/results/link_clusters.log - path: output/pirate/results/link_clusters.log
contains: ["parsing paralog file"] contains: ["parsing paralog file"]
- path: output/pirate/results/loci_list.tab - path: output/pirate/results/loci_list.tab
contains: ["GCF_000298385_00242"] contains: ["test1_00242"]
- path: output/pirate/results/loci_paralog_categories.tab - path: output/pirate/results/loci_paralog_categories.tab
md5sum: 6404d2a32526a398f42d7da768a389bd md5sum: f8c3d31f445c3cb66bd95ba77be0a7b8
- path: output/pirate/results/modified_gffs/GCF_000292685.gff - path: output/pirate/results/modified_gffs/test1.gff
md5sum: 2b73bda2f84dc634303dc90e641040ca md5sum: 5dc220e472165c3b693e1017f38fc0e3
- path: output/pirate/results/modified_gffs/GCF_000298385.gff - path: output/pirate/results/modified_gffs/test2.gff
md5sum: b1a9d6557d47e09249f08a7acdbbd618 md5sum: 975f5c7fb363caf1be9c53e5e237bb5d
- path: output/pirate/results/modified_gffs/GCF_002849995.gff - path: output/pirate/results/modified_gffs/test3.gff
md5sum: 68532fc9bb639e6d83c731a069f60cf8 md5sum: a4735a5ac75ce82aadba8a3de6cabc2e
- path: output/pirate/results/pan_sequences.fasta - path: output/pirate/results/pan_sequences.fasta
md5sum: ed835c77fdb20c36aa9d5208eb7ca0cb md5sum: 0492833706f0f969b49a3555c16ec40d
- path: output/pirate/results/pangenome.connected_blocks.tsv - path: output/pirate/results/pangenome.connected_blocks.tsv
contains: ["block_number"] contains: ["block_number"]
- path: output/pirate/results/pangenome.edges - path: output/pirate/results/pangenome.edges
@ -59,29 +59,29 @@
contains: ["g0091"] contains: ["g0091"]
- path: output/pirate/results/pangenome.temp - path: output/pirate/results/pangenome.temp
- path: output/pirate/results/pangenome_iterations/pan_sequences.50.reclustered.reinflated - path: output/pirate/results/pangenome_iterations/pan_sequences.50.reclustered.reinflated
contains: ["GCF_000298385_00242"] contains: ["test1_00242"]
- path: output/pirate/results/pangenome_iterations/pan_sequences.60.reclustered.reinflated - path: output/pirate/results/pangenome_iterations/pan_sequences.60.reclustered.reinflated
contains: ["GCF_000298385_00242"] contains: ["test1_00242"]
- path: output/pirate/results/pangenome_iterations/pan_sequences.70.reclustered.reinflated - path: output/pirate/results/pangenome_iterations/pan_sequences.70.reclustered.reinflated
contains: ["GCF_000298385_00242"] contains: ["test1_00242"]
- path: output/pirate/results/pangenome_iterations/pan_sequences.80.reclustered.reinflated - path: output/pirate/results/pangenome_iterations/pan_sequences.80.reclustered.reinflated
contains: ["GCF_000298385_00242"] contains: ["test1_00242"]
- path: output/pirate/results/pangenome_iterations/pan_sequences.90.reclustered.reinflated - path: output/pirate/results/pangenome_iterations/pan_sequences.90.reclustered.reinflated
contains: ["GCF_000298385_00242"] contains: ["test1_00242"]
- path: output/pirate/results/pangenome_iterations/pan_sequences.95.reclustered.reinflated - path: output/pirate/results/pangenome_iterations/pan_sequences.95.reclustered.reinflated
contains: ["GCF_000298385_00242"] contains: ["test1_00242"]
- path: output/pirate/results/pangenome_iterations/pan_sequences.98.reclustered.reinflated - path: output/pirate/results/pangenome_iterations/pan_sequences.98.reclustered.reinflated
contains: ["GCF_000298385_00242"] contains: ["test1_00242"]
- path: output/pirate/results/pangenome_iterations/pan_sequences.blast.output - path: output/pirate/results/pangenome_iterations/pan_sequences.blast.output
md5sum: 9da25d27684bfcc5488987ab2d1fd3a1 md5sum: ffb3b5bf1ba24bf2bd0be677500139a2
- path: output/pirate/results/pangenome_iterations/pan_sequences.cdhit_clusters - path: output/pirate/results/pangenome_iterations/pan_sequences.cdhit_clusters
contains: ["GCF_000298385_00081"] contains: ["test1_00081"]
- path: output/pirate/results/pangenome_iterations/pan_sequences.core_clusters.tab - path: output/pirate/results/pangenome_iterations/pan_sequences.core_clusters.tab
contains: ["GCF_000298385_00242"] contains: ["test1_00242"]
- path: output/pirate/results/pangenome_iterations/pan_sequences.mcl_log.txt - path: output/pirate/results/pangenome_iterations/pan_sequences.mcl_log.txt
contains: ["chaos"] contains: ["chaos"]
- path: output/pirate/results/pangenome_iterations/pan_sequences.representative.fasta - path: output/pirate/results/pangenome_iterations/pan_sequences.representative.fasta
md5sum: 84668b6c65b57026a17a50b0edd02541 md5sum: 6e483c773ed90bd50b33f2bd569343e4
- path: output/pirate/results/pangenome_iterations/pan_sequences.representative.fasta.pdb - path: output/pirate/results/pangenome_iterations/pan_sequences.representative.fasta.pdb
- path: output/pirate/results/pangenome_iterations/pan_sequences.representative.fasta.pot - path: output/pirate/results/pangenome_iterations/pan_sequences.representative.fasta.pot
- path: output/pirate/results/pangenome_iterations/pan_sequences.representative.fasta.ptf - path: output/pirate/results/pangenome_iterations/pan_sequences.representative.fasta.ptf
@ -95,4 +95,4 @@
- path: output/pirate/results/representative_sequences.ffn - path: output/pirate/results/representative_sequences.ffn
contains: ["representative_genome"] contains: ["representative_genome"]
- path: output/pirate/results/split_groups.log - path: output/pirate/results/split_groups.log
contains: ["g0213"] contains: ["g0"]