samtools collate module (#2078)

* samtools collate module

* prettier fix

* prettier fix

* added cram functionality
This commit is contained in:
Priyanka Surana 2022-09-21 14:12:59 +01:00 committed by GitHub
parent d5d785b3d8
commit ab4c2e440d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 143 additions and 0 deletions

View file

@ -0,0 +1,46 @@
process SAMTOOLS_COLLATE {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0':
'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }"
input:
tuple val(meta), path(input)
path fasta
output:
tuple val(meta), path("*.bam"), emit: bam, optional: true
tuple val(meta), path("*.cram"), emit: cram, optional: true
tuple val(meta), path("*.sam"), emit: sam, optional: true
path "versions.yml", emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def reference = fasta ? "--reference ${fasta}" : ""
def extension = args.contains("--output-fmt sam") ? "sam" :
args.contains("--output-fmt bam") ? "bam" :
args.contains("--output-fmt cram") ? "cram" :
"bam"
if ("$input" == "${prefix}.${extension}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
"""
samtools \\
collate \\
$args \\
${reference} \\
-@ $task.cpus \\
-o ${prefix}.${extension} \\
$input
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' ))
END_VERSIONS
"""
}

View file

@ -0,0 +1,44 @@
name: "samtools_collate"
description: shuffles and groups reads together by their names
keywords:
- collate
- bam
tools:
- "samtools":
description: "Tools for dealing with SAM, BAM and CRAM files"
homepage: "http://www.htslib.org"
documentation: "https://www.htslib.org/doc/samtools-collate.html"
tool_dev_url: "https://github.com/samtools/samtools"
doi: "10.1093/bioinformatics/btp352"
licence: "['MIT']"
input:
# Only when we have meta
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- input:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
output:
#Only when we have meta
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- output:
type: file
description: Collated BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
authors:
- "@priyanka-surana"

View file

@ -2035,6 +2035,10 @@ samtools/bam2fq:
- modules/samtools/bam2fq/**
- tests/modules/samtools/bam2fq/**
samtools/collate:
- modules/samtools/collate/**
- tests/modules/samtools/collate/**
samtools/collatefastq:
- modules/samtools/collatefastq/**
- tests/modules/samtools/collatefastq/**

View file

@ -0,0 +1,27 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { SAMTOOLS_COLLATE } from '../../../../modules/samtools/collate/main.nf'
workflow test_samtools_collate {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true)
]
SAMTOOLS_COLLATE ( input, [] )
}
workflow test_samtools_collate_cram {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true)
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
SAMTOOLS_COLLATE ( input, fasta )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,17 @@
- name: samtools collate test_samtools_collate
command: nextflow run ./tests/modules/samtools/collate -entry test_samtools_collate -c ./tests/config/nextflow.config -c ./tests/modules/samtools/collate/nextflow.config
tags:
- samtools/collate
- samtools
files:
- path: output/samtools/test.bam
md5sum: dbb8244c28778a970e49577731b158df
- name: samtools collate test_samtools_collate_cram
command: nextflow run ./tests/modules/samtools/collate -entry test_samtools_collate_cram -c ./tests/config/nextflow.config -c ./tests/modules/samtools/collate/nextflow.config
tags:
- samtools/collate
- samtools
files:
- path: output/samtools/test.bam
md5sum: 81aa61ed1f6f3bf6b9c94f70cbf2a177