Instrain profile (#1822)

* add instrain profile

* module instrain/profile

* add instrain profile

* module instrain/profile

* instrain profile

* Update modules/instrain/profile/main.nf

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* Update modules/instrain/profile/meta.yml

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* Update modules/instrain/profile/meta.yml

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* Update modules/instrain/profile/meta.yml

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* Update modules/instrain/profile/main.nf

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* Update modules/instrain/profile/main.nf

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* Update modules/instrain/profile/main.nf

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* update

* linting

* Apply suggestions from code review

* Update modules/instrain/profile/main.nf

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>
This commit is contained in:
Matt Olm 2022-06-30 09:44:41 -07:00 committed by GitHub
parent d6102dab6d
commit 031fbd37aa
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 134 additions and 0 deletions

View file

@ -0,0 +1,44 @@
process INSTRAIN_PROFILE {
tag "$meta.id"
label 'process_high'
conda (params.enable_conda ? "bioconda::instrain=1.6.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/instrain:1.6.1--pyhdfd78af_0':
'quay.io/biocontainers/instrain:1.6.1--pyhdfd78af_0' }"
input:
tuple val(meta), path(bam)
path genome_fasta
path genes_fasta
path stb_file
output:
tuple val(meta), path("*.IS") , emit: profile
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def genes_args = genes_fasta ? "-g ${genes_fasta}": ''
def stb_args = stb_file ? "-s ${stb_file}": ''
"""
inStrain \\
profile \\
$bam \\
$genome_fasta \\
-o ${prefix}.IS \\
-p $task.cpus \\
$genes_args \\
$stb_args \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
instrain: \$(echo \$(inStrain profile --version 2>&1) | awk 'NF{ print \$NF }')
END_VERSIONS
"""
}

View file

@ -0,0 +1,54 @@
name: "instrain_profile"
description: inStrain is python program for analysis of co-occurring genome populations from metagenomes that allows highly accurate genome comparisons, analysis of coverage, microdiversity, and linkage, and sensitive SNP detection with gene localization and synonymous non-synonymous identification
keywords:
- instrain
- metagenomics
- population genomics
- profile
tools:
- instrain:
description: Calculation of strain-level metrics
homepage: https://github.com/MrOlm/instrain
documentation: https://instrain.readthedocs.io/en/latest/
tool_dev_url: https://github.com/MrOlm/instrain
doi: 10.1038/s41587-020-00797-0
licence: ["MIT"]
input:
- meta:
type: map
description: Groovy Map containing sample information e.g. [ id:'test']
- bam:
type: path
description: Path to .bam file to be profiled
pattern: "*.{bam,sam}"
- genome_fasta:
type: path
description: Path to .fasta file to be profiled; MUST be the .fasta file that was mapped to to create the .bam file
pattern: "*.{fasta,fna,fa}"
- genes_fasta:
type: path
description: Path to .fna file of genes to be profiled (OPTIONAL)
pattern: "*.{fasta,fna,fa}"
- stb_file:
type: path
description: Path to .stb (scaffold to bin) file to be profiled (OPTIONAL)
pattern: "*.stb"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- profile:
type: path
description: InStrain profile folder
pattern: "*.IS/"
authors:
- "@mrolm"

View file

@ -1113,6 +1113,10 @@ imputeme/vcftoprs:
- modules/imputeme/vcftoprs/**
- tests/modules/imputeme/vcftoprs/**
instrain/profile:
- modules/instrain/profile/**
- tests/modules/instrain/profile/**
iqtree:
- modules/iqtree/**
- tests/modules/iqtree/**

View file

@ -0,0 +1,18 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { INSTRAIN_PROFILE } from '../../../../modules/instrain/profile/main.nf'
workflow test_instrain_profile {
input = [
[ id:'test', single_end:true ], // meta map
[
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
]
]
genome_fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
INSTRAIN_PROFILE ( input , genome_fasta , [] , [] )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,9 @@
- name: "instrain"
command: nextflow run ./tests/modules/instrain/profile -entry test_instrain_profile -c ./tests/config/nextflow.config -c ./tests/modules/instrain/profile/nextflow.config
tags:
- instrain
- instrain/profile
files:
- path: output/instrain/versions.yml
- path: output/instrain/test.IS/output/test.IS_SNVs.tsv
md5sum: 1d753903af5a25be540dcff255e25a1f