mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2025-01-02 20:52:07 -05:00
Merge pull request #34 from luslab/feat-mod-umitools
Added Umi-tools module covering the dedup function
This commit is contained in:
commit
cb89722ea8
19 changed files with 213 additions and 0 deletions
8
tools/umi_tools/Dockerfile
Normal file
8
tools/umi_tools/Dockerfile
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
FROM nfcore/base:1.7
|
||||||
|
LABEL authors="chris.cheshire@crick.ac.uk" \
|
||||||
|
description="Docker image containing all requirements for the nf-core umi_tools module"
|
||||||
|
|
||||||
|
# Install conda packages
|
||||||
|
COPY environment.yml /
|
||||||
|
RUN conda env create -f /environment.yml && conda clean -a
|
||||||
|
ENV PATH /opt/conda/envs/nfcore-module-umitools/bin:$PATH
|
10
tools/umi_tools/environment.yml
Normal file
10
tools/umi_tools/environment.yml
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
# This file creates a conda environment for the umi_tools module
|
||||||
|
# conda env create -f environment.yml
|
||||||
|
name: nfcore-module-umitools
|
||||||
|
channels:
|
||||||
|
- conda-forge
|
||||||
|
- bioconda
|
||||||
|
- defaults
|
||||||
|
dependencies:
|
||||||
|
- umi_tools=1.0.1
|
||||||
|
- samtools=1.1.0
|
45
tools/umi_tools/main.nf
Normal file
45
tools/umi_tools/main.nf
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
#!/usr/bin/env nextflow
|
||||||
|
|
||||||
|
// Specify DSL2
|
||||||
|
nextflow.preview.dsl = 2
|
||||||
|
|
||||||
|
// Process definition
|
||||||
|
process umitools_dedup {
|
||||||
|
publishDir "${params.outdir}/umitools/dedup",
|
||||||
|
mode: "copy", overwrite: true
|
||||||
|
|
||||||
|
container 'luslab/nf-modules-umitools:latest'
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(sample_id), path(bam)
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(sample_id), path("${sample_id}.dedup.bam"), emit: dedupBam
|
||||||
|
tuple val(sample_id), path("${sample_id}.dedup.bam.bai"), emit: dedupBai
|
||||||
|
path "*.dedup.log", emit: report
|
||||||
|
|
||||||
|
script:
|
||||||
|
|
||||||
|
// Init
|
||||||
|
args = "--log=${sample_id}.dedup.log"
|
||||||
|
|
||||||
|
// Check main args string exists and strip whitespace
|
||||||
|
if(params.umitools_dedup_args) {
|
||||||
|
ext_args = params.umitools_dedup_args
|
||||||
|
args += " " + ext_args.trim()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Contruct CL line
|
||||||
|
dedup_command = "umi_tools dedup ${args} -I ${bam[0]} -S ${sample_id}.dedup.bam --output-stats=${sample_id}"
|
||||||
|
|
||||||
|
// Log
|
||||||
|
if (params.verbose){
|
||||||
|
println ("[MODULE] umi_tools/dedup command: " + dedup_command)
|
||||||
|
}
|
||||||
|
|
||||||
|
//SHELL
|
||||||
|
"""
|
||||||
|
${dedup_command}
|
||||||
|
samtools index ${sample_id}.dedup.bam
|
||||||
|
"""
|
||||||
|
}
|
BIN
tools/umi_tools/test/input/sample1.bai
Executable file
BIN
tools/umi_tools/test/input/sample1.bai
Executable file
Binary file not shown.
BIN
tools/umi_tools/test/input/sample1.bam
Executable file
BIN
tools/umi_tools/test/input/sample1.bam
Executable file
Binary file not shown.
BIN
tools/umi_tools/test/input/sample2.bai
Executable file
BIN
tools/umi_tools/test/input/sample2.bai
Executable file
Binary file not shown.
BIN
tools/umi_tools/test/input/sample2.bam
Executable file
BIN
tools/umi_tools/test/input/sample2.bam
Executable file
Binary file not shown.
BIN
tools/umi_tools/test/input/sample3.bai
Executable file
BIN
tools/umi_tools/test/input/sample3.bai
Executable file
Binary file not shown.
BIN
tools/umi_tools/test/input/sample3.bam
Executable file
BIN
tools/umi_tools/test/input/sample3.bam
Executable file
Binary file not shown.
BIN
tools/umi_tools/test/input/sample4.bai
Executable file
BIN
tools/umi_tools/test/input/sample4.bai
Executable file
Binary file not shown.
BIN
tools/umi_tools/test/input/sample4.bam
Executable file
BIN
tools/umi_tools/test/input/sample4.bam
Executable file
Binary file not shown.
BIN
tools/umi_tools/test/input/sample5.bai
Executable file
BIN
tools/umi_tools/test/input/sample5.bai
Executable file
Binary file not shown.
BIN
tools/umi_tools/test/input/sample5.bam
Executable file
BIN
tools/umi_tools/test/input/sample5.bam
Executable file
Binary file not shown.
BIN
tools/umi_tools/test/input/sample6.bai
Executable file
BIN
tools/umi_tools/test/input/sample6.bai
Executable file
Binary file not shown.
BIN
tools/umi_tools/test/input/sample6.bam
Executable file
BIN
tools/umi_tools/test/input/sample6.bam
Executable file
Binary file not shown.
62
tools/umi_tools/test/main.nf
Normal file
62
tools/umi_tools/test/main.nf
Normal file
|
@ -0,0 +1,62 @@
|
||||||
|
#!/usr/bin/env nextflow
|
||||||
|
|
||||||
|
// Define DSL2
|
||||||
|
nextflow.preview.dsl=2
|
||||||
|
|
||||||
|
// Log
|
||||||
|
log.info ("Starting tests for umi_tools dedup...")
|
||||||
|
|
||||||
|
/*------------------------------------------------------------------------------------*/
|
||||||
|
/* Define params
|
||||||
|
--------------------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
params.umitools_dedup_args = '--umi-separator=":"'
|
||||||
|
params.verbose = false
|
||||||
|
|
||||||
|
/*------------------------------------------------------------------------------------*/
|
||||||
|
/* Module inclusions
|
||||||
|
--------------------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
include umitools_dedup from '../main.nf'
|
||||||
|
|
||||||
|
/*------------------------------------------------------------------------------------*/
|
||||||
|
/* Define input channels
|
||||||
|
--------------------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
// Define test data
|
||||||
|
testData = [
|
||||||
|
['sample1', "$baseDir/input/sample1.bam", "$baseDir/input/sample1.bai"],
|
||||||
|
['sample2', "$baseDir/input/sample2.bam", "$baseDir/input/sample2.bai"],
|
||||||
|
['sample3', "$baseDir/input/sample3.bam", "$baseDir/input/sample3.bai"],
|
||||||
|
['sample4', "$baseDir/input/sample4.bam", "$baseDir/input/sample4.bai"],
|
||||||
|
['sample5', "$baseDir/input/sample5.bam", "$baseDir/input/sample5.bai"],
|
||||||
|
['sample6', "$baseDir/input/sample6.bam", "$baseDir/input/sample6.bai"]
|
||||||
|
]
|
||||||
|
|
||||||
|
//Define test data input channel
|
||||||
|
Channel
|
||||||
|
.from(testData)
|
||||||
|
.map { row -> [ row[0], [file(row[1], checkIfExists: true), file(row[2], checkIfExists: true)]]}
|
||||||
|
.set {ch_bam}
|
||||||
|
|
||||||
|
/*------------------------------------------------------------------------------------*/
|
||||||
|
/* Run tests
|
||||||
|
--------------------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
workflow {
|
||||||
|
// Run dedup
|
||||||
|
umitools_dedup ( ch_bam )
|
||||||
|
}
|
||||||
|
|
||||||
|
workflow.onComplete {
|
||||||
|
def proc = "$baseDir/verify-checksum.sh $baseDir/../../../results/umitools/dedup/*.bam $baseDir/output/*.bam".execute()
|
||||||
|
def b = new StringBuffer()
|
||||||
|
proc.consumeProcessErrorStream(b)
|
||||||
|
|
||||||
|
log.info proc.text
|
||||||
|
|
||||||
|
errorString = b.toString()
|
||||||
|
if(errorString != '')
|
||||||
|
log.error errorString
|
||||||
|
exit 1
|
||||||
|
}
|
2
tools/umi_tools/test/nextflow.config
Normal file
2
tools/umi_tools/test/nextflow.config
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
params.outdir = './results'
|
||||||
|
docker.enabled = true
|
40
tools/umi_tools/test/verify-checksum.sh
Executable file
40
tools/umi_tools/test/verify-checksum.sh
Executable file
|
@ -0,0 +1,40 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
if [ -z "$1" ]
|
||||||
|
then
|
||||||
|
echo "No check pattern argument supplied" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z "$2" ]
|
||||||
|
then
|
||||||
|
echo "No verify pattern argument supplied" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
checkfiles=$1
|
||||||
|
infiles=$2
|
||||||
|
#echo $checkfiles
|
||||||
|
#echo $infiles
|
||||||
|
|
||||||
|
echo '\nCalculating check file hashes...'
|
||||||
|
md5sum $checkfiles
|
||||||
|
|
||||||
|
echo '\nCalculating input file hashes...'
|
||||||
|
md5sum $infiles
|
||||||
|
|
||||||
|
echo '\nComparing hash of file of hashes...'
|
||||||
|
checkver=$(md5sum $checkfiles | awk '{print $1}' | md5sum | awk '{print $1}')
|
||||||
|
echo $checkver
|
||||||
|
|
||||||
|
inver=$(md5sum $infiles | awk '{print $1}' | md5sum | awk '{print $1}')
|
||||||
|
echo $inver
|
||||||
|
|
||||||
|
if [ "$checkver" == "$inver" ]
|
||||||
|
then
|
||||||
|
echo "Hashes match"
|
||||||
|
exit 0
|
||||||
|
else
|
||||||
|
echo "Hashes do not match" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
46
tools/umi_tools/umi_tools.yml
Normal file
46
tools/umi_tools/umi_tools.yml
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
name: umi_tools
|
||||||
|
version: 1.0
|
||||||
|
description: Tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes.
|
||||||
|
keywords:
|
||||||
|
- UMI
|
||||||
|
- RMT
|
||||||
|
- Barcode
|
||||||
|
tools:
|
||||||
|
- umi_tools:
|
||||||
|
description: |
|
||||||
|
Tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes.
|
||||||
|
homepage: https://github.com/CGATOxford/UMI-tools
|
||||||
|
documentation: https://umi-tools.readthedocs.io/en/latest/
|
||||||
|
processes:
|
||||||
|
- dedup:
|
||||||
|
operation: |
|
||||||
|
Set command args to params.umitools_dedup_args
|
||||||
|
The program will execute with the following pattern:
|
||||||
|
umi_tools dedup --log={SAMPLE_ID}.dedup.log {params.umitools_dedup_args} -I {SAMPLE_ID}.bam -S {SAMPLE_ID}.dedup.bam --output-stats={SAMPLE_ID}
|
||||||
|
description: |
|
||||||
|
Groups PCR duplicates and de-duplicates reads to yield one read per group.
|
||||||
|
Use this when you want to remove the PCR duplicates prior to any downstream analysis.
|
||||||
|
input:
|
||||||
|
- sample_id:
|
||||||
|
type: string
|
||||||
|
description: Sample identifier
|
||||||
|
- bam:
|
||||||
|
type: file array
|
||||||
|
description: BAM sequence file and associated BAI index file
|
||||||
|
output:
|
||||||
|
- dedupBam:
|
||||||
|
type: tuple
|
||||||
|
description: A tuple of samples id and output bam file
|
||||||
|
pattern: [sample_id, *SAMPLE_ID.dedup.bam]
|
||||||
|
- dedupBam:
|
||||||
|
type: tuple
|
||||||
|
description: A tuple of samples id and output bai file
|
||||||
|
pattern: [sample_id, *SAMPLE_ID.dedup.bam.bai]
|
||||||
|
- report:
|
||||||
|
type: file
|
||||||
|
description: Log file for the umi_tools operation
|
||||||
|
pattern: *SAMPLE_ID.dedup.log
|
||||||
|
authors:
|
||||||
|
- @candiceh08
|
||||||
|
- @chris-cheshire
|
||||||
|
|
Loading…
Reference in a new issue