samtools markdup module (#2087)

This commit is contained in:
Priyanka Surana 2022-09-21 15:39:10 +01:00 committed by GitHub
parent 41dfa13929
commit 22e144dc63
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 143 additions and 0 deletions

View file

@ -0,0 +1,47 @@
process SAMTOOLS_MARKDUP {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0':
'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }"
input:
tuple val(meta), path(input)
path fasta
output:
tuple val(meta), path("*.bam"), emit: bam, optional: true
tuple val(meta), path("*.cram"), emit: cram, optional: true
tuple val(meta), path("*.sam"), emit: sam, optional: true
path "versions.yml", emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def reference = fasta ? "--reference ${fasta}" : ""
def extension = args.contains("--output-fmt sam") ? "sam" :
args.contains("--output-fmt bam") ? "bam" :
args.contains("--output-fmt cram") ? "cram" :
"bam"
if ("$input" == "${prefix}.${extension}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
"""
samtools \\
markdup \\
$args \\
${reference} \\
-@ $task.cpus \\
-T $prefix \\
$input \\
${prefix}.${extension}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' ))
END_VERSIONS
"""
}

View file

@ -0,0 +1,41 @@
name: "samtools_markdup"
description: mark duplicate alignments in a coordinate sorted file
keywords:
- markdup
tools:
- "samtools":
description: "Tools for dealing with SAM, BAM and CRAM files"
homepage: "http://www.htslib.org"
documentation: "https://www.htslib.org/doc/samtools-markdup.html"
tool_dev_url: "https://github.com/samtools/samtools"
doi: "10.1093/bioinformatics/btp352"
licence: "['MIT']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- input:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- output:
type: file
description: Sorted BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
authors:
- "@priyanka-surana"

View file

@ -2079,6 +2079,10 @@ samtools/index:
- modules/samtools/index/**
- tests/modules/samtools/index/**
samtools/markdup:
- modules/samtools/markdup/**
- tests/modules/samtools/markdup/**
samtools/merge:
- modules/samtools/merge/**
- tests/modules/samtools/merge/**

View file

@ -0,0 +1,22 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { SAMTOOLS_COLLATE } from '../../../../modules/samtools/collate/main.nf'
include { SAMTOOLS_FIXMATE } from '../../../../modules/samtools/fixmate/main.nf'
include { SAMTOOLS_SORT } from '../../../../modules/samtools/sort/main.nf'
include { SAMTOOLS_MARKDUP } from '../../../../modules/samtools/markdup/main.nf'
workflow test_samtools_markdup {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true)
]
SAMTOOLS_COLLATE ( input, [] )
SAMTOOLS_FIXMATE ( SAMTOOLS_COLLATE.out.bam )
SAMTOOLS_SORT ( SAMTOOLS_FIXMATE.out.bam )
SAMTOOLS_MARKDUP ( SAMTOOLS_SORT.out.bam, [] )
}

View file

@ -0,0 +1,21 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: SAMTOOLS_COLLATE {
ext.prefix = { "${meta.id}.collate" }
}
withName: SAMTOOLS_FIXMATE {
ext.prefix = { "${meta.id}.fixmate" }
}
withName: SAMTOOLS_SORT {
ext.prefix = { "${meta.id}.sorted" }
}
withName: SAMTOOLS_MARKDUP {
ext.prefix = { "${meta.id}.markdup" }
}
}

View file

@ -0,0 +1,8 @@
- name: samtools markdup test_samtools_markdup
command: nextflow run ./tests/modules/samtools/markdup -entry test_samtools_markdup -c ./tests/config/nextflow.config -c ./tests/modules/samtools/markdup/nextflow.config
tags:
- samtools/markdup
- samtools
files:
- path: output/samtools/test.markdup.bam
md5sum: 75f35284e20970d279a0baac5bbf8ee0