bamcmp (#1244)

* New module added bamcmp Co-authored-by: Mahesh Binzer-Panchal <mahesh.binzer-panchal@nbis.se> Co-authored-by: Simon Pearce <simon.pearce@cruk.manchester.ac.uk>
2024-12-22 02:58:17 +00:00 · 2022-02-02 16:38:41 +00:00 · 2022-02-02 16:38:41 +00:00 · f112e4d701
commit f112e4d701
parent 601c6e2023
6 changed files with 170 additions and 0 deletions
--- a/modules/bamcmp/main.nf
+++ b/modules/bamcmp/main.nf
@ -0,0 +1,35 @@
+def VERSION = '2.2'
+process BAMCMP {
+    label 'process_low'
+
+    conda (params.enable_conda ? "bioconda::bamcmp=2.2" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/bamcmp:2.2--h05f6578_0' :
+        'quay.io/biocontainers/bamcmp:2.2--h05f6578_0' }"
+
+    input:
+    tuple val(meta), path(sample), path(contaminant)
+
+    output:
+    tuple val(meta), path("*primary.bam")      , emit: bam
+    tuple val(meta), path("*contamination.bam"), emit: contamination_bam
+    path "versions.yml"                        , emit: versions
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    bamcmp \\
+        -1 $sample \\
+        -2 $contaminant \\
+        -A ${prefix}_primary.bam \\
+        -B ${prefix}_contamination.bam \\
+        $args
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        bamcmp: $VERSION
+    END_VERSIONS
+    """
+
+}
--- a/modules/bamcmp/meta.yml
+++ b/modules/bamcmp/meta.yml
@ -0,0 +1,57 @@
+name: bamcmp
+description: Bamcmp (Bam Compare) is a tool for assigning reads between a primary genome and a contamination genome. For instance, filtering out mouse reads from patient derived xenograft mouse models (PDX).
+keywords:
+  - filter
+  - xenograft
+  - host
+  - graft
+  - contamination
+  - mouse
+
+tools:
+  - bamcmp:
+      description: Bamcmp is a tool for deconvolving host and graft reads, using two bam files. Reads should be mapped to two genomes, and the mapped,
+                    sorted bam files supplied to the tool. It is highly recommended to use the "-s as" option not the "-s mapq" option, else
+                    reads which multimap to the contamination genome will be spuriously kept.
+      homepage: https://github.com/CRUKMI-ComputationalBiology/bamcmp
+      documentation: https://github.com/CRUKMI-ComputationalBiology/bamcmp
+      tool_dev_url: https://github.com/CRUKMI-ComputationalBiology/bamcmp
+      doi: "10.1158/1541-7786.MCR-16-0431"
+      licence: ['GPL v3']
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test']
+
+  - sample:
+      type: file
+      description: BAM/CRAM/SAM file with the reads aligned to the primary genome (the one you want to keep)
+      pattern: "*.{bam,cram,sam}"
+
+  - contaminant:
+      type: file
+      description: BAM/CRAM/SAM file with the reads aligned to the contaminant genome (the one you want to filter out)
+      pattern: "*.{bam,cram,sam}"
+
+output:
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+
+  - bam:
+      type: file
+      description: Bam file containing the reads which align better to the primary genome.
+      pattern: "*.{bam,cram,sam}"
+
+  - bam:
+      type: file
+      description: Bam file containing the reads which align better to the contaminant genome.
+      pattern: "*.{bam,cram,sam}"
+
+authors:
+  - "@kmurat1"
+  - "@sppearce"
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@ -46,6 +46,10 @@ bamaligncleaner:
  - modules/bamaligncleaner/**
  - tests/modules/bamaligncleaner/**

+bamcmp:
+  - modules/bamcmp/**
+  - tests/modules/bamcmp/**
+
 bamtools/convert:
  - modules/bamtools/convert/**
  - tests/modules/bamtools/convert/**
--- a/tests/modules/bamcmp/main.nf
+++ b/tests/modules/bamcmp/main.nf
@ -0,0 +1,36 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { BWA_INDEX } from '../../../modules/bwa/index/main.nf'
+include { BWA_MEM   } from '../../../modules/bwa/mem/main.nf'
+include { BWA_INDEX as BWA_INDEX_COV2 } from '../../../modules/bwa/index/main.nf'
+include { BWA_MEM as BWA_MEM_COV2   } from '../../../modules/bwa/mem/main.nf'
+
+include { SAMTOOLS_SORT } from '../../../modules/samtools/sort/main.nf'
+include { SAMTOOLS_SORT as SAMTOOLS_SORT_COV2   } from '../../../modules/samtools/sort/main.nf'
+
+include { BAMCMP } from '../../../modules/bamcmp/main.nf'
+
+workflow test_bamcmp {
+
+    input = [
+        [ id:'test'], // meta map
+        [ file(params.test_data['homo_sapiens']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
+    ]
+
+    fasta1 = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+    fasta2 = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+
+    BWA_INDEX ( fasta1 )
+    BWA_MEM ( input, BWA_INDEX.out.index, false )
+    SAMTOOLS_SORT (BWA_MEM.out.bam)
+
+
+    BWA_INDEX_COV2 ( fasta2 )
+    BWA_MEM_COV2 ( input, BWA_INDEX_COV2.out.index, false )
+    SAMTOOLS_SORT_COV2 (BWA_MEM_COV2.out.bam)
+
+    BAMCMP (SAMTOOLS_SORT.out.bam.join(SAMTOOLS_SORT_COV2.out.bam, by: [0]))
+
+}
--- a/tests/modules/bamcmp/nextflow.config
+++ b/tests/modules/bamcmp/nextflow.config
@ -0,0 +1,27 @@
+process {
+
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+
+    withName: BWA_MEM {
+        ext.prefix = "human"
+    }
+
+    withName: BWA_MEM_COV2 {
+        ext.prefix = "cov2"
+    }
+
+    withName: SAMTOOLS_SORT {
+        ext.args = '-n'
+        ext.prefix = "sorted"
+    }
+
+    withName: SAMTOOLS_SORT_COV2 {
+        ext.args = '-n'
+        ext.prefix = "sorted_cov2"
+    }
+    
+    withName: BAMCMP {
+        ext.args = '-s "as"'
+    }
+
+}
--- a/tests/modules/bamcmp/test.yml
+++ b/tests/modules/bamcmp/test.yml
@ -0,0 +1,11 @@
+- name: bamcmp test_bamcmp
+  command: nextflow run tests/modules/bamcmp -entry test_bamcmp -c tests/config/nextflow.config
+  tags:
+    - bamcmp
+  files:
+    - path: output/bamcmp/test_contamination.bam
+      md5sum: 1fe730936d489c637479c1e51dd8ca55
+    - path: output/bamcmp/test_primary.bam
+      md5sum: 80b9abd8ef83e63548a9b8b82be2a034
+    - path: output/bamcmp/versions.yml
+      md5sum: 34d569665ff0459e84114e966dd3483b