* New module added bamcmp

Co-authored-by: Mahesh Binzer-Panchal <mahesh.binzer-panchal@nbis.se>
Co-authored-by: Simon Pearce <simon.pearce@cruk.manchester.ac.uk>
This commit is contained in:
Katarzyna Kamieniecka 2022-02-02 16:38:41 +00:00 committed by GitHub
parent 601c6e2023
commit f112e4d701
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 170 additions and 0 deletions

35
modules/bamcmp/main.nf Normal file
View file

@ -0,0 +1,35 @@
def VERSION = '2.2'
process BAMCMP {
label 'process_low'
conda (params.enable_conda ? "bioconda::bamcmp=2.2" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bamcmp:2.2--h05f6578_0' :
'quay.io/biocontainers/bamcmp:2.2--h05f6578_0' }"
input:
tuple val(meta), path(sample), path(contaminant)
output:
tuple val(meta), path("*primary.bam") , emit: bam
tuple val(meta), path("*contamination.bam"), emit: contamination_bam
path "versions.yml" , emit: versions
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
bamcmp \\
-1 $sample \\
-2 $contaminant \\
-A ${prefix}_primary.bam \\
-B ${prefix}_contamination.bam \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bamcmp: $VERSION
END_VERSIONS
"""
}

57
modules/bamcmp/meta.yml Normal file
View file

@ -0,0 +1,57 @@
name: bamcmp
description: Bamcmp (Bam Compare) is a tool for assigning reads between a primary genome and a contamination genome. For instance, filtering out mouse reads from patient derived xenograft mouse models (PDX).
keywords:
- filter
- xenograft
- host
- graft
- contamination
- mouse
tools:
- bamcmp:
description: Bamcmp is a tool for deconvolving host and graft reads, using two bam files. Reads should be mapped to two genomes, and the mapped,
sorted bam files supplied to the tool. It is highly recommended to use the "-s as" option not the "-s mapq" option, else
reads which multimap to the contamination genome will be spuriously kept.
homepage: https://github.com/CRUKMI-ComputationalBiology/bamcmp
documentation: https://github.com/CRUKMI-ComputationalBiology/bamcmp
tool_dev_url: https://github.com/CRUKMI-ComputationalBiology/bamcmp
doi: "10.1158/1541-7786.MCR-16-0431"
licence: ['GPL v3']
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test']
- sample:
type: file
description: BAM/CRAM/SAM file with the reads aligned to the primary genome (the one you want to keep)
pattern: "*.{bam,cram,sam}"
- contaminant:
type: file
description: BAM/CRAM/SAM file with the reads aligned to the contaminant genome (the one you want to filter out)
pattern: "*.{bam,cram,sam}"
output:
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- bam:
type: file
description: Bam file containing the reads which align better to the primary genome.
pattern: "*.{bam,cram,sam}"
- bam:
type: file
description: Bam file containing the reads which align better to the contaminant genome.
pattern: "*.{bam,cram,sam}"
authors:
- "@kmurat1"
- "@sppearce"

View file

@ -46,6 +46,10 @@ bamaligncleaner:
- modules/bamaligncleaner/**
- tests/modules/bamaligncleaner/**
bamcmp:
- modules/bamcmp/**
- tests/modules/bamcmp/**
bamtools/convert:
- modules/bamtools/convert/**
- tests/modules/bamtools/convert/**

View file

@ -0,0 +1,36 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { BWA_INDEX } from '../../../modules/bwa/index/main.nf'
include { BWA_MEM } from '../../../modules/bwa/mem/main.nf'
include { BWA_INDEX as BWA_INDEX_COV2 } from '../../../modules/bwa/index/main.nf'
include { BWA_MEM as BWA_MEM_COV2 } from '../../../modules/bwa/mem/main.nf'
include { SAMTOOLS_SORT } from '../../../modules/samtools/sort/main.nf'
include { SAMTOOLS_SORT as SAMTOOLS_SORT_COV2 } from '../../../modules/samtools/sort/main.nf'
include { BAMCMP } from '../../../modules/bamcmp/main.nf'
workflow test_bamcmp {
input = [
[ id:'test'], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
]
fasta1 = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fasta2 = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
BWA_INDEX ( fasta1 )
BWA_MEM ( input, BWA_INDEX.out.index, false )
SAMTOOLS_SORT (BWA_MEM.out.bam)
BWA_INDEX_COV2 ( fasta2 )
BWA_MEM_COV2 ( input, BWA_INDEX_COV2.out.index, false )
SAMTOOLS_SORT_COV2 (BWA_MEM_COV2.out.bam)
BAMCMP (SAMTOOLS_SORT.out.bam.join(SAMTOOLS_SORT_COV2.out.bam, by: [0]))
}

View file

@ -0,0 +1,27 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: BWA_MEM {
ext.prefix = "human"
}
withName: BWA_MEM_COV2 {
ext.prefix = "cov2"
}
withName: SAMTOOLS_SORT {
ext.args = '-n'
ext.prefix = "sorted"
}
withName: SAMTOOLS_SORT_COV2 {
ext.args = '-n'
ext.prefix = "sorted_cov2"
}
withName: BAMCMP {
ext.args = '-s "as"'
}
}

View file

@ -0,0 +1,11 @@
- name: bamcmp test_bamcmp
command: nextflow run tests/modules/bamcmp -entry test_bamcmp -c tests/config/nextflow.config
tags:
- bamcmp
files:
- path: output/bamcmp/test_contamination.bam
md5sum: 1fe730936d489c637479c1e51dd8ca55
- path: output/bamcmp/test_primary.bam
md5sum: 80b9abd8ef83e63548a9b8b82be2a034
- path: output/bamcmp/versions.yml
md5sum: 34d569665ff0459e84114e966dd3483b