Merge pull request #34 from luslab/feat-mod-umitools

Added Umi-tools module covering the dedup function
This commit is contained in:
Phil Ewels 2020-07-11 13:31:40 +02:00 committed by GitHub
commit cb89722ea8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
19 changed files with 213 additions and 0 deletions

View file

@ -0,0 +1,8 @@
FROM nfcore/base:1.7
LABEL authors="chris.cheshire@crick.ac.uk" \
description="Docker image containing all requirements for the nf-core umi_tools module"
# Install conda packages
COPY environment.yml /
RUN conda env create -f /environment.yml && conda clean -a
ENV PATH /opt/conda/envs/nfcore-module-umitools/bin:$PATH

View file

@ -0,0 +1,10 @@
# This file creates a conda environment for the umi_tools module
# conda env create -f environment.yml
name: nfcore-module-umitools
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- umi_tools=1.0.1
- samtools=1.1.0

45
tools/umi_tools/main.nf Normal file
View file

@ -0,0 +1,45 @@
#!/usr/bin/env nextflow
// Specify DSL2
nextflow.preview.dsl = 2
// Process definition
process umitools_dedup {
publishDir "${params.outdir}/umitools/dedup",
mode: "copy", overwrite: true
container 'luslab/nf-modules-umitools:latest'
input:
tuple val(sample_id), path(bam)
output:
tuple val(sample_id), path("${sample_id}.dedup.bam"), emit: dedupBam
tuple val(sample_id), path("${sample_id}.dedup.bam.bai"), emit: dedupBai
path "*.dedup.log", emit: report
script:
// Init
args = "--log=${sample_id}.dedup.log"
// Check main args string exists and strip whitespace
if(params.umitools_dedup_args) {
ext_args = params.umitools_dedup_args
args += " " + ext_args.trim()
}
// Contruct CL line
dedup_command = "umi_tools dedup ${args} -I ${bam[0]} -S ${sample_id}.dedup.bam --output-stats=${sample_id}"
// Log
if (params.verbose){
println ("[MODULE] umi_tools/dedup command: " + dedup_command)
}
//SHELL
"""
${dedup_command}
samtools index ${sample_id}.dedup.bam
"""
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,62 @@
#!/usr/bin/env nextflow
// Define DSL2
nextflow.preview.dsl=2
// Log
log.info ("Starting tests for umi_tools dedup...")
/*------------------------------------------------------------------------------------*/
/* Define params
--------------------------------------------------------------------------------------*/
params.umitools_dedup_args = '--umi-separator=":"'
params.verbose = false
/*------------------------------------------------------------------------------------*/
/* Module inclusions
--------------------------------------------------------------------------------------*/
include umitools_dedup from '../main.nf'
/*------------------------------------------------------------------------------------*/
/* Define input channels
--------------------------------------------------------------------------------------*/
// Define test data
testData = [
['sample1', "$baseDir/input/sample1.bam", "$baseDir/input/sample1.bai"],
['sample2', "$baseDir/input/sample2.bam", "$baseDir/input/sample2.bai"],
['sample3', "$baseDir/input/sample3.bam", "$baseDir/input/sample3.bai"],
['sample4', "$baseDir/input/sample4.bam", "$baseDir/input/sample4.bai"],
['sample5', "$baseDir/input/sample5.bam", "$baseDir/input/sample5.bai"],
['sample6', "$baseDir/input/sample6.bam", "$baseDir/input/sample6.bai"]
]
//Define test data input channel
Channel
.from(testData)
.map { row -> [ row[0], [file(row[1], checkIfExists: true), file(row[2], checkIfExists: true)]]}
.set {ch_bam}
/*------------------------------------------------------------------------------------*/
/* Run tests
--------------------------------------------------------------------------------------*/
workflow {
// Run dedup
umitools_dedup ( ch_bam )
}
workflow.onComplete {
def proc = "$baseDir/verify-checksum.sh $baseDir/../../../results/umitools/dedup/*.bam $baseDir/output/*.bam".execute()
def b = new StringBuffer()
proc.consumeProcessErrorStream(b)
log.info proc.text
errorString = b.toString()
if(errorString != '')
log.error errorString
exit 1
}

View file

@ -0,0 +1,2 @@
params.outdir = './results'
docker.enabled = true

View file

@ -0,0 +1,40 @@
#!/bin/sh
if [ -z "$1" ]
then
echo "No check pattern argument supplied" >&2
exit 1
fi
if [ -z "$2" ]
then
echo "No verify pattern argument supplied" >&2
exit 1
fi
checkfiles=$1
infiles=$2
#echo $checkfiles
#echo $infiles
echo '\nCalculating check file hashes...'
md5sum $checkfiles
echo '\nCalculating input file hashes...'
md5sum $infiles
echo '\nComparing hash of file of hashes...'
checkver=$(md5sum $checkfiles | awk '{print $1}' | md5sum | awk '{print $1}')
echo $checkver
inver=$(md5sum $infiles | awk '{print $1}' | md5sum | awk '{print $1}')
echo $inver
if [ "$checkver" == "$inver" ]
then
echo "Hashes match"
exit 0
else
echo "Hashes do not match" >&2
exit 1
fi

View file

@ -0,0 +1,46 @@
name: umi_tools
version: 1.0
description: Tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes.
keywords:
- UMI
- RMT
- Barcode
tools:
- umi_tools:
description: |
Tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes.
homepage: https://github.com/CGATOxford/UMI-tools
documentation: https://umi-tools.readthedocs.io/en/latest/
processes:
- dedup:
operation: |
Set command args to params.umitools_dedup_args
The program will execute with the following pattern:
umi_tools dedup --log={SAMPLE_ID}.dedup.log {params.umitools_dedup_args} -I {SAMPLE_ID}.bam -S {SAMPLE_ID}.dedup.bam --output-stats={SAMPLE_ID}
description: |
Groups PCR duplicates and de-duplicates reads to yield one read per group.
Use this when you want to remove the PCR duplicates prior to any downstream analysis.
input:
- sample_id:
type: string
description: Sample identifier
- bam:
type: file array
description: BAM sequence file and associated BAI index file
output:
- dedupBam:
type: tuple
description: A tuple of samples id and output bam file
pattern: [sample_id, *SAMPLE_ID.dedup.bam]
- dedupBam:
type: tuple
description: A tuple of samples id and output bai file
pattern: [sample_id, *SAMPLE_ID.dedup.bam.bai]
- report:
type: file
description: Log file for the umi_tools operation
pattern: *SAMPLE_ID.dedup.log
authors:
- @candiceh08
- @chris-cheshire