diff --git a/modules/panaroo/run/main.nf b/modules/panaroo/run/main.nf new file mode 100644 index 00000000..d8be0c1e --- /dev/null +++ b/modules/panaroo/run/main.nf @@ -0,0 +1,36 @@ +process PANAROO_RUN { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::panaroo=1.2.9" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/panaroo:1.2.9--pyhdfd78af_0': + 'quay.io/biocontainers/panaroo:1.2.9--pyhdfd78af_0' }" + + input: + tuple val(meta), path(gff) + + output: + tuple val(meta), path("results/*") , emit: results + tuple val(meta), path("results/core_gene_alignment.aln"), optional: true, emit: aln + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + panaroo \\ + $args \\ + -t $task.cpus \\ + -o results \\ + -i $gff + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + panaroo: \$(echo \$(panaroo --version 2>&1) | sed 's/^.*panaroo //' )) + END_VERSIONS + """ +} diff --git a/modules/panaroo/run/meta.yml b/modules/panaroo/run/meta.yml new file mode 100644 index 00000000..eaeb41d0 --- /dev/null +++ b/modules/panaroo/run/meta.yml @@ -0,0 +1,47 @@ +name: "panaroo_run" +description: A fast and scalable tool for bacterial pangenome analysis +keywords: + - gff + - pan-genome + - alignment +tools: + - "panaroo": + description: "panaroo - an updated pipeline for pangenome investigation" + homepage: "https://gtonkinhill.github.io/panaroo/#/" + documentation: "https://gtonkinhill.github.io/panaroo/#/gettingstarted/quickstart" + tool_dev_url: "https://github.com/gtonkinhill/panaroo" + doi: "10.1186/s13059-020-02090-4" + licence: "['MIT']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - gff: + type: file + description: A set of GFF3 formatted files + pattern: "*.{gff}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - results: + type: directory + description: Directory containing Panaroo result files + pattern: "*/*" + - aln: + type: file + description: Core-genome alignment produced by Panaroo (Optional) + pattern: "*.{fasta}" + +authors: + - "@rpetit3" diff --git a/modules/pirate/main.nf b/modules/pirate/main.nf index 20544a1b..ebef5b37 100644 --- a/modules/pirate/main.nf +++ b/modules/pirate/main.nf @@ -2,10 +2,10 @@ process PIRATE { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::pirate=1.0.4" : null) + conda (params.enable_conda ? "bioconda::pirate=1.0.4 bioconda::perl-bioperl=1.7.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pirate:1.0.4--hdfd78af_1' : - 'quay.io/biocontainers/pirate:1.0.4--hdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/pirate:1.0.4--hdfd78af_2' : + 'quay.io/biocontainers/pirate:1.0.4--hdfd78af_2' }" input: tuple val(meta), path(gff) diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 2a93a846..13bcbbd3 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1248,6 +1248,10 @@ pairtools/sort: - modules/pairtools/sort/** - tests/modules/pairtools/sort/** +panaroo/run: + - modules/panaroo/run/** + - tests/modules/panaroo/run/** + pangolin: - modules/pangolin/** - tests/modules/pangolin/** diff --git a/tests/modules/panaroo/run/main.nf b/tests/modules/panaroo/run/main.nf new file mode 100644 index 00000000..1fab62ed --- /dev/null +++ b/tests/modules/panaroo/run/main.nf @@ -0,0 +1,19 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { PANAROO_RUN } from '../../../../modules/panaroo/run/main.nf' + +workflow test_panaroo_run { + + input = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['test1_gff'], checkIfExists: true), + file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['test2_gff'], checkIfExists: true), + file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['test3_gff'], checkIfExists: true) + ] + ] + + PANAROO_RUN ( input ) +} diff --git a/tests/modules/panaroo/run/nextflow.config b/tests/modules/panaroo/run/nextflow.config new file mode 100644 index 00000000..af23e716 --- /dev/null +++ b/tests/modules/panaroo/run/nextflow.config @@ -0,0 +1,8 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: PANAROO_RUN { + ext.args = '--clean-mode strict -a core' + } +} diff --git a/tests/modules/panaroo/run/test.yml b/tests/modules/panaroo/run/test.yml new file mode 100644 index 00000000..20f0b814 --- /dev/null +++ b/tests/modules/panaroo/run/test.yml @@ -0,0 +1,38 @@ +- name: panaroo run test_panaroo_run + command: nextflow run tests/modules/panaroo/run -entry test_panaroo_run -c tests/config/nextflow.config + tags: + - panaroo/run + - panaroo + files: + - path: output/panaroo/results/combined_DNA_CDS.fasta + md5sum: 92bd4f6b9c84c84c0989d77d2558b7d1 + - path: output/panaroo/results/combined_protein_CDS.fasta + md5sum: 7a5636c4f2abc190f5dd4dfbb1da7034 + - path: output/panaroo/results/combined_protein_cdhit_out.txt + md5sum: 2164b81606712b53af2198b4d50eed08 + - path: output/panaroo/results/combined_protein_cdhit_out.txt.clstr + md5sum: aaa9fc2b797c932b6d9ff2df640113f1 + - path: output/panaroo/results/core_alignment_header.embl + contains: ["DNA", "feature", "locus_tag"] + - path: output/panaroo/results/core_gene_alignment.aln + contains: ["test1", "test2", "test3"] + - path: output/panaroo/results/final_graph.gml + contains: ["graph", "isolateNames", "test3"] + - path: output/panaroo/results/gene_data.csv + md5sum: 45df2d26ac81754b858c0e4757e834b2 + - path: output/panaroo/results/gene_presence_absence.Rtab + md5sum: 9efbed5df121dc0c4fbb58869b65c3e4 + - path: output/panaroo/results/gene_presence_absence.csv + contains: ["Gene", "unique", "test3"] + - path: output/panaroo/results/gene_presence_absence_roary.csv + contains: ["Gene", "Max", "Fragment"] + - path: output/panaroo/results/pan_genome_reference.fa + md5sum: d0b03cdfaab8965d86cb1045db3c2d20 + - path: output/panaroo/results/pre_filt_graph.gml + contains: ["graph", "node", "id"] + - path: output/panaroo/results/struct_presence_absence.Rtab + md5sum: 246017db1175f9b1683dfe60cb2e9b5b + - path: output/panaroo/results/summary_statistics.txt + md5sum: 9c73c416e1758bea06c67b4ec9fd0720 + - path: output/panaroo/versions.yml + md5sum: 909e7b046a8a87319986d34b7328641c diff --git a/tests/modules/pirate/main.nf b/tests/modules/pirate/main.nf index 05e5bdd8..45c60713 100644 --- a/tests/modules/pirate/main.nf +++ b/tests/modules/pirate/main.nf @@ -14,10 +14,6 @@ workflow test_pirate { file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['test3_gff'], checkIfExists: true) ] ] - // [ file("https://github.com/bactopia/bactopia-tests/raw/main/data/reference/gff/GCF_000292685.gff", checkIfExists: true), - // file("https://github.com/bactopia/bactopia-tests/raw/main/data/reference/gff/GCF_000298385.gff", checkIfExists: true), - // file("https://github.com/bactopia/bactopia-tests/raw/main/data/reference/gff/GCF_002849995.gff", checkIfExists: true) ] - // ] PIRATE ( input ) } diff --git a/tests/modules/pirate/test.yml b/tests/modules/pirate/test.yml index 6ef9e6e0..ffee5552 100644 --- a/tests/modules/pirate/test.yml +++ b/tests/modules/pirate/test.yml @@ -16,35 +16,35 @@ - path: output/pirate/results/PIRATE.unique_alleles.tsv contains: ["allele_name"] - path: output/pirate/results/binary_presence_absence.fasta - contains: ["GCF_000292685"] + contains: ["test1"] - path: output/pirate/results/binary_presence_absence.nwk - md5sum: 5b5d86bf97d97de37bb9db514abb7762 + md5sum: 1fcf7434911bbab39b74791259f1f989 - path: output/pirate/results/cluster_alleles.tab contains: ["g0001"] - - path: output/pirate/results/co-ords/GCF_000292685.co-ords.tab - md5sum: d5ca0f06ca7ea1f5486683d5859bc9b8 - - path: output/pirate/results/co-ords/GCF_000298385.co-ords.tab - md5sum: a24d6048b3074242bb558c7fa27a8b03 - - path: output/pirate/results/co-ords/GCF_002849995.co-ords.tab - md5sum: 0c08228585f4fa95686e9b025e0fe9c1 + - path: output/pirate/results/co-ords/test1.co-ords.tab + md5sum: f1e75c045b35bae260dadb1a2f000dfa + - path: output/pirate/results/co-ords/test2.co-ords.tab + md5sum: 953b0d2f1dfd4c3a6a6dd246c9174703 + - path: output/pirate/results/co-ords/test3.co-ords.tab + md5sum: 61f2c52ef77dc9a97a200c57b89b7d69 - path: output/pirate/results/genome2loci.tab - md5sum: bbcea5bfcdcafe14a9aa7261c8e931b8 + md5sum: 2d9cdefd5b1a7f5e20b0a70a6e5fa588 - path: output/pirate/results/genome_list.txt - md5sum: 6534b1635c258ad92b829077addc1ff5 + md5sum: c19ac63da7949b15179f42093cbf95b8 - path: output/pirate/results/link_clusters.log contains: ["parsing paralog file"] - path: output/pirate/results/loci_list.tab - contains: ["GCF_000298385_00242"] + contains: ["test1_00242"] - path: output/pirate/results/loci_paralog_categories.tab - md5sum: 6404d2a32526a398f42d7da768a389bd - - path: output/pirate/results/modified_gffs/GCF_000292685.gff - md5sum: 2b73bda2f84dc634303dc90e641040ca - - path: output/pirate/results/modified_gffs/GCF_000298385.gff - md5sum: b1a9d6557d47e09249f08a7acdbbd618 - - path: output/pirate/results/modified_gffs/GCF_002849995.gff - md5sum: 68532fc9bb639e6d83c731a069f60cf8 + md5sum: f8c3d31f445c3cb66bd95ba77be0a7b8 + - path: output/pirate/results/modified_gffs/test1.gff + md5sum: 5dc220e472165c3b693e1017f38fc0e3 + - path: output/pirate/results/modified_gffs/test2.gff + md5sum: 975f5c7fb363caf1be9c53e5e237bb5d + - path: output/pirate/results/modified_gffs/test3.gff + md5sum: a4735a5ac75ce82aadba8a3de6cabc2e - path: output/pirate/results/pan_sequences.fasta - md5sum: ed835c77fdb20c36aa9d5208eb7ca0cb + md5sum: 0492833706f0f969b49a3555c16ec40d - path: output/pirate/results/pangenome.connected_blocks.tsv contains: ["block_number"] - path: output/pirate/results/pangenome.edges @@ -59,29 +59,29 @@ contains: ["g0091"] - path: output/pirate/results/pangenome.temp - path: output/pirate/results/pangenome_iterations/pan_sequences.50.reclustered.reinflated - contains: ["GCF_000298385_00242"] + contains: ["test1_00242"] - path: output/pirate/results/pangenome_iterations/pan_sequences.60.reclustered.reinflated - contains: ["GCF_000298385_00242"] + contains: ["test1_00242"] - path: output/pirate/results/pangenome_iterations/pan_sequences.70.reclustered.reinflated - contains: ["GCF_000298385_00242"] + contains: ["test1_00242"] - path: output/pirate/results/pangenome_iterations/pan_sequences.80.reclustered.reinflated - contains: ["GCF_000298385_00242"] + contains: ["test1_00242"] - path: output/pirate/results/pangenome_iterations/pan_sequences.90.reclustered.reinflated - contains: ["GCF_000298385_00242"] + contains: ["test1_00242"] - path: output/pirate/results/pangenome_iterations/pan_sequences.95.reclustered.reinflated - contains: ["GCF_000298385_00242"] + contains: ["test1_00242"] - path: output/pirate/results/pangenome_iterations/pan_sequences.98.reclustered.reinflated - contains: ["GCF_000298385_00242"] + contains: ["test1_00242"] - path: output/pirate/results/pangenome_iterations/pan_sequences.blast.output - md5sum: 9da25d27684bfcc5488987ab2d1fd3a1 + md5sum: ffb3b5bf1ba24bf2bd0be677500139a2 - path: output/pirate/results/pangenome_iterations/pan_sequences.cdhit_clusters - contains: ["GCF_000298385_00081"] + contains: ["test1_00081"] - path: output/pirate/results/pangenome_iterations/pan_sequences.core_clusters.tab - contains: ["GCF_000298385_00242"] + contains: ["test1_00242"] - path: output/pirate/results/pangenome_iterations/pan_sequences.mcl_log.txt contains: ["chaos"] - path: output/pirate/results/pangenome_iterations/pan_sequences.representative.fasta - md5sum: 84668b6c65b57026a17a50b0edd02541 + md5sum: 6e483c773ed90bd50b33f2bd569343e4 - path: output/pirate/results/pangenome_iterations/pan_sequences.representative.fasta.pdb - path: output/pirate/results/pangenome_iterations/pan_sequences.representative.fasta.pot - path: output/pirate/results/pangenome_iterations/pan_sequences.representative.fasta.ptf @@ -95,4 +95,4 @@ - path: output/pirate/results/representative_sequences.ffn contains: ["representative_genome"] - path: output/pirate/results/split_groups.log - contains: ["g0213"] + contains: ["g0"]