From f112e4d701fd56b0406206db020444471c581218 Mon Sep 17 00:00:00 2001 From: Katarzyna Kamieniecka Date: Wed, 2 Feb 2022 16:38:41 +0000 Subject: [PATCH] bamcmp (#1244) * New module added bamcmp Co-authored-by: Mahesh Binzer-Panchal Co-authored-by: Simon Pearce --- modules/bamcmp/main.nf | 35 +++++++++++++++++ modules/bamcmp/meta.yml | 57 ++++++++++++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/bamcmp/main.nf | 36 ++++++++++++++++++ tests/modules/bamcmp/nextflow.config | 27 +++++++++++++ tests/modules/bamcmp/test.yml | 11 ++++++ 6 files changed, 170 insertions(+) create mode 100644 modules/bamcmp/main.nf create mode 100644 modules/bamcmp/meta.yml create mode 100644 tests/modules/bamcmp/main.nf create mode 100644 tests/modules/bamcmp/nextflow.config create mode 100644 tests/modules/bamcmp/test.yml diff --git a/modules/bamcmp/main.nf b/modules/bamcmp/main.nf new file mode 100644 index 00000000..54416323 --- /dev/null +++ b/modules/bamcmp/main.nf @@ -0,0 +1,35 @@ +def VERSION = '2.2' +process BAMCMP { + label 'process_low' + + conda (params.enable_conda ? "bioconda::bamcmp=2.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bamcmp:2.2--h05f6578_0' : + 'quay.io/biocontainers/bamcmp:2.2--h05f6578_0' }" + + input: + tuple val(meta), path(sample), path(contaminant) + + output: + tuple val(meta), path("*primary.bam") , emit: bam + tuple val(meta), path("*contamination.bam"), emit: contamination_bam + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + bamcmp \\ + -1 $sample \\ + -2 $contaminant \\ + -A ${prefix}_primary.bam \\ + -B ${prefix}_contamination.bam \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bamcmp: $VERSION + END_VERSIONS + """ + +} diff --git a/modules/bamcmp/meta.yml b/modules/bamcmp/meta.yml new file mode 100644 index 00000000..480d8609 --- /dev/null +++ b/modules/bamcmp/meta.yml @@ -0,0 +1,57 @@ +name: bamcmp +description: Bamcmp (Bam Compare) is a tool for assigning reads between a primary genome and a contamination genome. For instance, filtering out mouse reads from patient derived xenograft mouse models (PDX). +keywords: + - filter + - xenograft + - host + - graft + - contamination + - mouse + +tools: + - bamcmp: + description: Bamcmp is a tool for deconvolving host and graft reads, using two bam files. Reads should be mapped to two genomes, and the mapped, + sorted bam files supplied to the tool. It is highly recommended to use the "-s as" option not the "-s mapq" option, else + reads which multimap to the contamination genome will be spuriously kept. + homepage: https://github.com/CRUKMI-ComputationalBiology/bamcmp + documentation: https://github.com/CRUKMI-ComputationalBiology/bamcmp + tool_dev_url: https://github.com/CRUKMI-ComputationalBiology/bamcmp + doi: "10.1158/1541-7786.MCR-16-0431" + licence: ['GPL v3'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + + - sample: + type: file + description: BAM/CRAM/SAM file with the reads aligned to the primary genome (the one you want to keep) + pattern: "*.{bam,cram,sam}" + + - contaminant: + type: file + description: BAM/CRAM/SAM file with the reads aligned to the contaminant genome (the one you want to filter out) + pattern: "*.{bam,cram,sam}" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + + - bam: + type: file + description: Bam file containing the reads which align better to the primary genome. + pattern: "*.{bam,cram,sam}" + + - bam: + type: file + description: Bam file containing the reads which align better to the contaminant genome. + pattern: "*.{bam,cram,sam}" + +authors: + - "@kmurat1" + - "@sppearce" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 588dfd02..8b3ff3e0 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -46,6 +46,10 @@ bamaligncleaner: - modules/bamaligncleaner/** - tests/modules/bamaligncleaner/** +bamcmp: + - modules/bamcmp/** + - tests/modules/bamcmp/** + bamtools/convert: - modules/bamtools/convert/** - tests/modules/bamtools/convert/** diff --git a/tests/modules/bamcmp/main.nf b/tests/modules/bamcmp/main.nf new file mode 100644 index 00000000..cf796406 --- /dev/null +++ b/tests/modules/bamcmp/main.nf @@ -0,0 +1,36 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { BWA_INDEX } from '../../../modules/bwa/index/main.nf' +include { BWA_MEM } from '../../../modules/bwa/mem/main.nf' +include { BWA_INDEX as BWA_INDEX_COV2 } from '../../../modules/bwa/index/main.nf' +include { BWA_MEM as BWA_MEM_COV2 } from '../../../modules/bwa/mem/main.nf' + +include { SAMTOOLS_SORT } from '../../../modules/samtools/sort/main.nf' +include { SAMTOOLS_SORT as SAMTOOLS_SORT_COV2 } from '../../../modules/samtools/sort/main.nf' + +include { BAMCMP } from '../../../modules/bamcmp/main.nf' + +workflow test_bamcmp { + + input = [ + [ id:'test'], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + + fasta1 = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fasta2 = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + + BWA_INDEX ( fasta1 ) + BWA_MEM ( input, BWA_INDEX.out.index, false ) + SAMTOOLS_SORT (BWA_MEM.out.bam) + + + BWA_INDEX_COV2 ( fasta2 ) + BWA_MEM_COV2 ( input, BWA_INDEX_COV2.out.index, false ) + SAMTOOLS_SORT_COV2 (BWA_MEM_COV2.out.bam) + + BAMCMP (SAMTOOLS_SORT.out.bam.join(SAMTOOLS_SORT_COV2.out.bam, by: [0])) + +} diff --git a/tests/modules/bamcmp/nextflow.config b/tests/modules/bamcmp/nextflow.config new file mode 100644 index 00000000..d3c4858e --- /dev/null +++ b/tests/modules/bamcmp/nextflow.config @@ -0,0 +1,27 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: BWA_MEM { + ext.prefix = "human" + } + + withName: BWA_MEM_COV2 { + ext.prefix = "cov2" + } + + withName: SAMTOOLS_SORT { + ext.args = '-n' + ext.prefix = "sorted" + } + + withName: SAMTOOLS_SORT_COV2 { + ext.args = '-n' + ext.prefix = "sorted_cov2" + } + + withName: BAMCMP { + ext.args = '-s "as"' + } + +} diff --git a/tests/modules/bamcmp/test.yml b/tests/modules/bamcmp/test.yml new file mode 100644 index 00000000..58fba458 --- /dev/null +++ b/tests/modules/bamcmp/test.yml @@ -0,0 +1,11 @@ +- name: bamcmp test_bamcmp + command: nextflow run tests/modules/bamcmp -entry test_bamcmp -c tests/config/nextflow.config + tags: + - bamcmp + files: + - path: output/bamcmp/test_contamination.bam + md5sum: 1fe730936d489c637479c1e51dd8ca55 + - path: output/bamcmp/test_primary.bam + md5sum: 80b9abd8ef83e63548a9b8b82be2a034 + - path: output/bamcmp/versions.yml + md5sum: 34d569665ff0459e84114e966dd3483b