bamcmp (#1244)

* New module added bamcmp Co-authored-by: Mahesh Binzer-Panchal <mahesh.binzer-panchal@nbis.se> Co-authored-by: Simon Pearce <simon.pearce@cruk.manchester.ac.uk>
2024-12-22 02:58:17 +00:00 · 2022-02-02 16:38:41 +00:00 · 2022-02-02 16:38:41 +00:00 · f112e4d701
commit f112e4d701
parent 601c6e2023
6 changed files with 170 additions and 0 deletions
--- a/modules/bamcmp/main.nf
+++ b/modules/bamcmp/main.nf
@ -0,0 +1,35 @@
 def VERSION = '2.2'
 process BAMCMP {
    label 'process_low'
    conda (params.enable_conda ? "bioconda::bamcmp=2.2" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/bamcmp:2.2--h05f6578_0' :
        'quay.io/biocontainers/bamcmp:2.2--h05f6578_0' }"
    input:
    tuple val(meta), path(sample), path(contaminant)
    output:
    tuple val(meta), path("*primary.bam")      , emit: bam
    tuple val(meta), path("*contamination.bam"), emit: contamination_bam
    path "versions.yml"                        , emit: versions
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    """
    bamcmp \\
        -1 $sample \\
        -2 $contaminant \\
        -A ${prefix}_primary.bam \\
        -B ${prefix}_contamination.bam \\
        $args
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        bamcmp: $VERSION
    END_VERSIONS
    """
 }
--- a/modules/bamcmp/meta.yml
+++ b/modules/bamcmp/meta.yml
@ -0,0 +1,57 @@
 name: bamcmp
 description: Bamcmp (Bam Compare) is a tool for assigning reads between a primary genome and a contamination genome. For instance, filtering out mouse reads from patient derived xenograft mouse models (PDX).
 keywords:
  - filter
  - xenograft
  - host
  - graft
  - contamination
  - mouse
 tools:
  - bamcmp:
      description: Bamcmp is a tool for deconvolving host and graft reads, using two bam files. Reads should be mapped to two genomes, and the mapped,
                    sorted bam files supplied to the tool. It is highly recommended to use the "-s as" option not the "-s mapq" option, else
                    reads which multimap to the contamination genome will be spuriously kept.
      homepage: https://github.com/CRUKMI-ComputationalBiology/bamcmp
      documentation: https://github.com/CRUKMI-ComputationalBiology/bamcmp
      tool_dev_url: https://github.com/CRUKMI-ComputationalBiology/bamcmp
      doi: "10.1158/1541-7786.MCR-16-0431"
      licence: ['GPL v3']
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test']
  - sample:
      type: file
      description: BAM/CRAM/SAM file with the reads aligned to the primary genome (the one you want to keep)
      pattern: "*.{bam,cram,sam}"
  - contaminant:
      type: file
      description: BAM/CRAM/SAM file with the reads aligned to the contaminant genome (the one you want to filter out)
      pattern: "*.{bam,cram,sam}"
 output:
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - bam:
      type: file
      description: Bam file containing the reads which align better to the primary genome.
      pattern: "*.{bam,cram,sam}"
  - bam:
      type: file
      description: Bam file containing the reads which align better to the contaminant genome.
      pattern: "*.{bam,cram,sam}"
 authors:
  - "@kmurat1"
  - "@sppearce"
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@ -46,6 +46,10 @@ bamaligncleaner:
  - modules/bamaligncleaner/**
  - tests/modules/bamaligncleaner/**
 bamcmp:
  - modules/bamcmp/**
  - tests/modules/bamcmp/**
 bamtools/convert:
  - modules/bamtools/convert/**
  - tests/modules/bamtools/convert/**
--- a/tests/modules/bamcmp/main.nf
+++ b/tests/modules/bamcmp/main.nf
@ -0,0 +1,36 @@
 #!/usr/bin/env nextflow
 nextflow.enable.dsl = 2
 include { BWA_INDEX } from '../../../modules/bwa/index/main.nf'
 include { BWA_MEM   } from '../../../modules/bwa/mem/main.nf'
 include { BWA_INDEX as BWA_INDEX_COV2 } from '../../../modules/bwa/index/main.nf'
 include { BWA_MEM as BWA_MEM_COV2   } from '../../../modules/bwa/mem/main.nf'
 include { SAMTOOLS_SORT } from '../../../modules/samtools/sort/main.nf'
 include { SAMTOOLS_SORT as SAMTOOLS_SORT_COV2   } from '../../../modules/samtools/sort/main.nf'
 include { BAMCMP } from '../../../modules/bamcmp/main.nf'
 workflow test_bamcmp {
    input = [
        [ id:'test'], // meta map
        [ file(params.test_data['homo_sapiens']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
    ]
    fasta1 = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
    fasta2 = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
    BWA_INDEX ( fasta1 )
    BWA_MEM ( input, BWA_INDEX.out.index, false )
    SAMTOOLS_SORT (BWA_MEM.out.bam)
    BWA_INDEX_COV2 ( fasta2 )
    BWA_MEM_COV2 ( input, BWA_INDEX_COV2.out.index, false )
    SAMTOOLS_SORT_COV2 (BWA_MEM_COV2.out.bam)
    BAMCMP (SAMTOOLS_SORT.out.bam.join(SAMTOOLS_SORT_COV2.out.bam, by: [0]))
 }
--- a/tests/modules/bamcmp/nextflow.config
+++ b/tests/modules/bamcmp/nextflow.config
@ -0,0 +1,27 @@
 process {
    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
    withName: BWA_MEM {
        ext.prefix = "human"
    }
    withName: BWA_MEM_COV2 {
        ext.prefix = "cov2"
    }
    withName: SAMTOOLS_SORT {
        ext.args = '-n'
        ext.prefix = "sorted"
    }
    withName: SAMTOOLS_SORT_COV2 {
        ext.args = '-n'
        ext.prefix = "sorted_cov2"
    }
    withName: BAMCMP {
        ext.args = '-s "as"'
    }
 }
--- a/tests/modules/bamcmp/test.yml
+++ b/tests/modules/bamcmp/test.yml
@ -0,0 +1,11 @@
 - name: bamcmp test_bamcmp
  command: nextflow run tests/modules/bamcmp -entry test_bamcmp -c tests/config/nextflow.config
  tags:
    - bamcmp
  files:
    - path: output/bamcmp/test_contamination.bam
      md5sum: 1fe730936d489c637479c1e51dd8ca55
    - path: output/bamcmp/test_primary.bam
      md5sum: 80b9abd8ef83e63548a9b8b82be2a034
    - path: output/bamcmp/versions.yml
      md5sum: 34d569665ff0459e84114e966dd3483b