From ab4c2e440d518b4815cc276cf621f89df4ffaa70 Mon Sep 17 00:00:00 2001 From: Priyanka Surana <86618929+priyanka-surana@users.noreply.github.com> Date: Wed, 21 Sep 2022 14:12:59 +0100 Subject: [PATCH] samtools collate module (#2078) * samtools collate module * prettier fix * prettier fix * added cram functionality --- modules/samtools/collate/main.nf | 46 +++++++++++++++++++ modules/samtools/collate/meta.yml | 44 ++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/samtools/collate/main.nf | 27 +++++++++++ .../modules/samtools/collate/nextflow.config | 5 ++ tests/modules/samtools/collate/test.yml | 17 +++++++ 6 files changed, 143 insertions(+) create mode 100644 modules/samtools/collate/main.nf create mode 100644 modules/samtools/collate/meta.yml create mode 100644 tests/modules/samtools/collate/main.nf create mode 100644 tests/modules/samtools/collate/nextflow.config create mode 100644 tests/modules/samtools/collate/test.yml diff --git a/modules/samtools/collate/main.nf b/modules/samtools/collate/main.nf new file mode 100644 index 00000000..4944b70a --- /dev/null +++ b/modules/samtools/collate/main.nf @@ -0,0 +1,46 @@ +process SAMTOOLS_COLLATE { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0': + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(input) + path fasta + + output: + tuple val(meta), path("*.bam"), emit: bam, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + tuple val(meta), path("*.sam"), emit: sam, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + def extension = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + "bam" + if ("$input" == "${prefix}.${extension}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + samtools \\ + collate \\ + $args \\ + ${reference} \\ + -@ $task.cpus \\ + -o ${prefix}.${extension} \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )) + END_VERSIONS + """ +} diff --git a/modules/samtools/collate/meta.yml b/modules/samtools/collate/meta.yml new file mode 100644 index 00000000..0e78403c --- /dev/null +++ b/modules/samtools/collate/meta.yml @@ -0,0 +1,44 @@ +name: "samtools_collate" +description: shuffles and groups reads together by their names +keywords: + - collate + - bam +tools: + - "samtools": + description: "Tools for dealing with SAM, BAM and CRAM files" + homepage: "http://www.htslib.org" + documentation: "https://www.htslib.org/doc/samtools-collate.html" + tool_dev_url: "https://github.com/samtools/samtools" + doi: "10.1093/bioinformatics/btp352" + licence: "['MIT']" + +input: + # Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +output: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - output: + type: file + description: Collated BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +authors: + - "@priyanka-surana" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 6b4ad00c..5fa0cb9f 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -2035,6 +2035,10 @@ samtools/bam2fq: - modules/samtools/bam2fq/** - tests/modules/samtools/bam2fq/** +samtools/collate: + - modules/samtools/collate/** + - tests/modules/samtools/collate/** + samtools/collatefastq: - modules/samtools/collatefastq/** - tests/modules/samtools/collatefastq/** diff --git a/tests/modules/samtools/collate/main.nf b/tests/modules/samtools/collate/main.nf new file mode 100644 index 00000000..5fec4913 --- /dev/null +++ b/tests/modules/samtools/collate/main.nf @@ -0,0 +1,27 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { SAMTOOLS_COLLATE } from '../../../../modules/samtools/collate/main.nf' + +workflow test_samtools_collate { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) + ] + + SAMTOOLS_COLLATE ( input, [] ) +} + +workflow test_samtools_collate_cram { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true) + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + + SAMTOOLS_COLLATE ( input, fasta ) +} diff --git a/tests/modules/samtools/collate/nextflow.config b/tests/modules/samtools/collate/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/samtools/collate/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/samtools/collate/test.yml b/tests/modules/samtools/collate/test.yml new file mode 100644 index 00000000..784ede89 --- /dev/null +++ b/tests/modules/samtools/collate/test.yml @@ -0,0 +1,17 @@ +- name: samtools collate test_samtools_collate + command: nextflow run ./tests/modules/samtools/collate -entry test_samtools_collate -c ./tests/config/nextflow.config -c ./tests/modules/samtools/collate/nextflow.config + tags: + - samtools/collate + - samtools + files: + - path: output/samtools/test.bam + md5sum: dbb8244c28778a970e49577731b158df + +- name: samtools collate test_samtools_collate_cram + command: nextflow run ./tests/modules/samtools/collate -entry test_samtools_collate_cram -c ./tests/config/nextflow.config -c ./tests/modules/samtools/collate/nextflow.config + tags: + - samtools/collate + - samtools + files: + - path: output/samtools/test.bam + md5sum: 81aa61ed1f6f3bf6b9c94f70cbf2a177