From 031fbd37aaf2531f8460801afcd1264b19a874ba Mon Sep 17 00:00:00 2001 From: Matt Olm Date: Thu, 30 Jun 2022 09:44:41 -0700 Subject: [PATCH] Instrain profile (#1822) * add instrain profile * module instrain/profile * add instrain profile * module instrain/profile * instrain profile * Update modules/instrain/profile/main.nf Co-authored-by: James A. Fellows Yates * Update modules/instrain/profile/meta.yml Co-authored-by: James A. Fellows Yates * Update modules/instrain/profile/meta.yml Co-authored-by: James A. Fellows Yates * Update modules/instrain/profile/meta.yml Co-authored-by: James A. Fellows Yates * Update modules/instrain/profile/main.nf Co-authored-by: James A. Fellows Yates * Update modules/instrain/profile/main.nf Co-authored-by: James A. Fellows Yates * Update modules/instrain/profile/main.nf Co-authored-by: James A. Fellows Yates * update * linting * Apply suggestions from code review * Update modules/instrain/profile/main.nf Co-authored-by: James A. Fellows Yates --- modules/instrain/profile/main.nf | 44 +++++++++++++++ modules/instrain/profile/meta.yml | 54 +++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/instrain/profile/main.nf | 18 +++++++ .../modules/instrain/profile/nextflow.config | 5 ++ tests/modules/instrain/profile/test.yml | 9 ++++ 6 files changed, 134 insertions(+) create mode 100644 modules/instrain/profile/main.nf create mode 100644 modules/instrain/profile/meta.yml create mode 100644 tests/modules/instrain/profile/main.nf create mode 100644 tests/modules/instrain/profile/nextflow.config create mode 100644 tests/modules/instrain/profile/test.yml diff --git a/modules/instrain/profile/main.nf b/modules/instrain/profile/main.nf new file mode 100644 index 00000000..74e4b3ab --- /dev/null +++ b/modules/instrain/profile/main.nf @@ -0,0 +1,44 @@ +process INSTRAIN_PROFILE { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? "bioconda::instrain=1.6.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/instrain:1.6.1--pyhdfd78af_0': + 'quay.io/biocontainers/instrain:1.6.1--pyhdfd78af_0' }" + + input: + tuple val(meta), path(bam) + path genome_fasta + path genes_fasta + path stb_file + + output: + tuple val(meta), path("*.IS") , emit: profile + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def genes_args = genes_fasta ? "-g ${genes_fasta}": '' + def stb_args = stb_file ? "-s ${stb_file}": '' + """ + inStrain \\ + profile \\ + $bam \\ + $genome_fasta \\ + -o ${prefix}.IS \\ + -p $task.cpus \\ + $genes_args \\ + $stb_args \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + instrain: \$(echo \$(inStrain profile --version 2>&1) | awk 'NF{ print \$NF }') + END_VERSIONS + """ +} diff --git a/modules/instrain/profile/meta.yml b/modules/instrain/profile/meta.yml new file mode 100644 index 00000000..efcf3ede --- /dev/null +++ b/modules/instrain/profile/meta.yml @@ -0,0 +1,54 @@ +name: "instrain_profile" +description: inStrain is python program for analysis of co-occurring genome populations from metagenomes that allows highly accurate genome comparisons, analysis of coverage, microdiversity, and linkage, and sensitive SNP detection with gene localization and synonymous non-synonymous identification +keywords: + - instrain + - metagenomics + - population genomics + - profile +tools: + - instrain: + description: Calculation of strain-level metrics + homepage: https://github.com/MrOlm/instrain + documentation: https://instrain.readthedocs.io/en/latest/ + tool_dev_url: https://github.com/MrOlm/instrain + doi: 10.1038/s41587-020-00797-0 + licence: ["MIT"] + +input: + - meta: + type: map + description: Groovy Map containing sample information e.g. [ id:'test'] + - bam: + type: path + description: Path to .bam file to be profiled + pattern: "*.{bam,sam}" + - genome_fasta: + type: path + description: Path to .fasta file to be profiled; MUST be the .fasta file that was mapped to to create the .bam file + pattern: "*.{fasta,fna,fa}" + - genes_fasta: + type: path + description: Path to .fna file of genes to be profiled (OPTIONAL) + pattern: "*.{fasta,fna,fa}" + - stb_file: + type: path + description: Path to .stb (scaffold to bin) file to be profiled (OPTIONAL) + pattern: "*.stb" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - profile: + type: path + description: InStrain profile folder + pattern: "*.IS/" + +authors: + - "@mrolm" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 35a8c897..3eba2e8c 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1113,6 +1113,10 @@ imputeme/vcftoprs: - modules/imputeme/vcftoprs/** - tests/modules/imputeme/vcftoprs/** +instrain/profile: + - modules/instrain/profile/** + - tests/modules/instrain/profile/** + iqtree: - modules/iqtree/** - tests/modules/iqtree/** diff --git a/tests/modules/instrain/profile/main.nf b/tests/modules/instrain/profile/main.nf new file mode 100644 index 00000000..ce054725 --- /dev/null +++ b/tests/modules/instrain/profile/main.nf @@ -0,0 +1,18 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { INSTRAIN_PROFILE } from '../../../../modules/instrain/profile/main.nf' + +workflow test_instrain_profile { + + input = [ + [ id:'test', single_end:true ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + ] + ] + genome_fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + + INSTRAIN_PROFILE ( input , genome_fasta , [] , [] ) +} diff --git a/tests/modules/instrain/profile/nextflow.config b/tests/modules/instrain/profile/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/instrain/profile/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/instrain/profile/test.yml b/tests/modules/instrain/profile/test.yml new file mode 100644 index 00000000..c256d101 --- /dev/null +++ b/tests/modules/instrain/profile/test.yml @@ -0,0 +1,9 @@ +- name: "instrain" + command: nextflow run ./tests/modules/instrain/profile -entry test_instrain_profile -c ./tests/config/nextflow.config -c ./tests/modules/instrain/profile/nextflow.config + tags: + - instrain + - instrain/profile + files: + - path: output/instrain/versions.yml + - path: output/instrain/test.IS/output/test.IS_SNVs.tsv + md5sum: 1d753903af5a25be540dcff255e25a1f