Add: ANGSD_DOCOUNTS (#2044)

* Update main.nf

* Update meta.yml

* Re-add logos as not staged in a way that works with MultiQC config files

* Add ANGSD doCounts

* Prettier

* Update main.nf

* Apply suggestions from code review

* Apply suggestions from code review

* Apply suggestions from code review

* Try loosening test due to conda

* Remove md5 for binary file

Co-authored-by: Simon Pearce <24893913+SPPearce@users.noreply.github.com>
This commit is contained in:
James A. Fellows Yates 2022-09-27 13:21:44 +02:00 committed by GitHub
parent 82501fe6d0
commit 1780cf9bcf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 158 additions and 0 deletions

View file

@ -0,0 +1,45 @@
process ANGSD_DOCOUNTS {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::angsd=0.939" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/angsd:0.939--h468462d_0':
'quay.io/biocontainers/angsd:0.939--h468462d_0' }"
input:
tuple val(meta), path(bam), path(bai), path(minqfile)
output:
tuple val(meta), path("*.depthSample"), optional: true, emit: depth_sample
tuple val(meta), path("*.depthGlobal"), optional: true, emit: depth_global
tuple val(meta), path("*.qs") , optional: true, emit: qs
tuple val(meta), path("*.pos.gz") , optional: true, emit: pos
tuple val(meta), path("*.counts.gz") , optional: true, emit: counts
tuple val(meta), path("*.icnts.gz") , optional: true, emit: icounts
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def minq = minqfile ? "-minQfile ${minqfile}" : ""
"""
readlink -f *.bam > bamlist.txt
angsd \\
-nThreads ${task.cpus} \\
-doCounts 1 \\
$args \\
-bam bamlist.txt \\
-out ${prefix} \\
$minq
cat <<-END_VERSIONS > versions.yml
"${task.process}":
angsd: \$(echo \$(angsd 2>&1) | grep version | head -n 1 | sed 's/.*version: //g;s/ .*//g')
END_VERSIONS
"""
}

View file

@ -0,0 +1,72 @@
name: "angsd_docounts"
description: Calculates base frequency statistics across reference positions from BAM.
keywords:
- angsd
- population genetics
- allele counts
- doCounts
tools:
- "angsd":
description: "ANGSD: Analysis of next generation Sequencing Data"
homepage: "http://www.popgen.dk/angsd/"
documentation: "http://www.popgen.dk/angsd/"
tool_dev_url: "https://github.com/ANGSD/angsd"
doi: "10.1186/s12859-014-0356-4"
licence: "['GPL v3, MIT']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: A list of BAM or CRAM files
pattern: "*.{bam,cram}"
- bai:
type: file
description: List of BAM/CRAM index files
pattern: "*.{bai,csi}"
- minqfile:
type: file
description: File with individual quality score thresholds
pattern: "*"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- depth_sample:
type: file
description: Distribution of sequencing depths
pattern: "*.depthSample"
- depth_global:
type: file
description: Distribution of sequencing depths
pattern: "*.depthGlobal"
- qs:
type: file
description: Distribution of scores
pattern: "*.qs"
- pos:
type: file
description: Various types of depth statistics (depending on value for -dumpCounts)
pattern: "*.pos.gz"
- counts:
type: file
description: Various types of statistics (related to pos.gz)
pattern: "*.counts.gz"
- icounts:
type: file
description: Internal format for dumping binary single chrs. Useful for ANGSD contamination
pattern: "*.icnts.gz"
authors:
- "@jfy133"

View file

@ -46,6 +46,10 @@ amrfinderplus/update:
- modules/amrfinderplus/update/**
- tests/modules/amrfinderplus/update/**
angsd/docounts:
- modules/angsd/docounts/**
- tests/modules/angsd/docounts/**
antismash/antismashlite:
- modules/antismash/antismashlite/**
- tests/modules/antismash/antismashlite/**

View file

@ -0,0 +1,17 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { ANGSD_DOCOUNTS } from '../../../../modules/angsd/docounts/main.nf'
workflow test_angsd_docounts {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_markduplicates_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_markduplicates_sorted_bam_bai'], checkIfExists: true),
[]
]
ANGSD_DOCOUNTS ( input )
}

View file

@ -0,0 +1,9 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: ANGSD_DOCOUNTS {
ext.args = "-iCounts 1 -doDepth -dumpCounts 2"
}
}

View file

@ -0,0 +1,11 @@
- name: angsd docounts test_angsd_docounts
command: nextflow run ./tests/modules/angsd/docounts -entry test_angsd_docounts -c ./tests/config/nextflow.config -c ./tests/modules/angsd/docounts/nextflow.config
tags:
- angsd/docounts
- angsd
files:
- path: output/angsd/test.counts.gz
contains: ["ind0TotDepth"]
- path: output/angsd/test.icnts.gz
- path: output/angsd/test.pos.gz
contains: ["chr pos totDepth"]