mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-11-13 05:13:09 +00:00
Merge pull request #34 from luslab/feat-mod-umitools
Added Umi-tools module covering the dedup function
This commit is contained in:
commit
cb89722ea8
19 changed files with 213 additions and 0 deletions
8
tools/umi_tools/Dockerfile
Normal file
8
tools/umi_tools/Dockerfile
Normal file
|
@ -0,0 +1,8 @@
|
|||
FROM nfcore/base:1.7
|
||||
LABEL authors="chris.cheshire@crick.ac.uk" \
|
||||
description="Docker image containing all requirements for the nf-core umi_tools module"
|
||||
|
||||
# Install conda packages
|
||||
COPY environment.yml /
|
||||
RUN conda env create -f /environment.yml && conda clean -a
|
||||
ENV PATH /opt/conda/envs/nfcore-module-umitools/bin:$PATH
|
10
tools/umi_tools/environment.yml
Normal file
10
tools/umi_tools/environment.yml
Normal file
|
@ -0,0 +1,10 @@
|
|||
# This file creates a conda environment for the umi_tools module
|
||||
# conda env create -f environment.yml
|
||||
name: nfcore-module-umitools
|
||||
channels:
|
||||
- conda-forge
|
||||
- bioconda
|
||||
- defaults
|
||||
dependencies:
|
||||
- umi_tools=1.0.1
|
||||
- samtools=1.1.0
|
45
tools/umi_tools/main.nf
Normal file
45
tools/umi_tools/main.nf
Normal file
|
@ -0,0 +1,45 @@
|
|||
#!/usr/bin/env nextflow
|
||||
|
||||
// Specify DSL2
|
||||
nextflow.preview.dsl = 2
|
||||
|
||||
// Process definition
|
||||
process umitools_dedup {
|
||||
publishDir "${params.outdir}/umitools/dedup",
|
||||
mode: "copy", overwrite: true
|
||||
|
||||
container 'luslab/nf-modules-umitools:latest'
|
||||
|
||||
input:
|
||||
tuple val(sample_id), path(bam)
|
||||
|
||||
output:
|
||||
tuple val(sample_id), path("${sample_id}.dedup.bam"), emit: dedupBam
|
||||
tuple val(sample_id), path("${sample_id}.dedup.bam.bai"), emit: dedupBai
|
||||
path "*.dedup.log", emit: report
|
||||
|
||||
script:
|
||||
|
||||
// Init
|
||||
args = "--log=${sample_id}.dedup.log"
|
||||
|
||||
// Check main args string exists and strip whitespace
|
||||
if(params.umitools_dedup_args) {
|
||||
ext_args = params.umitools_dedup_args
|
||||
args += " " + ext_args.trim()
|
||||
}
|
||||
|
||||
// Contruct CL line
|
||||
dedup_command = "umi_tools dedup ${args} -I ${bam[0]} -S ${sample_id}.dedup.bam --output-stats=${sample_id}"
|
||||
|
||||
// Log
|
||||
if (params.verbose){
|
||||
println ("[MODULE] umi_tools/dedup command: " + dedup_command)
|
||||
}
|
||||
|
||||
//SHELL
|
||||
"""
|
||||
${dedup_command}
|
||||
samtools index ${sample_id}.dedup.bam
|
||||
"""
|
||||
}
|
BIN
tools/umi_tools/test/input/sample1.bai
Executable file
BIN
tools/umi_tools/test/input/sample1.bai
Executable file
Binary file not shown.
BIN
tools/umi_tools/test/input/sample1.bam
Executable file
BIN
tools/umi_tools/test/input/sample1.bam
Executable file
Binary file not shown.
BIN
tools/umi_tools/test/input/sample2.bai
Executable file
BIN
tools/umi_tools/test/input/sample2.bai
Executable file
Binary file not shown.
BIN
tools/umi_tools/test/input/sample2.bam
Executable file
BIN
tools/umi_tools/test/input/sample2.bam
Executable file
Binary file not shown.
BIN
tools/umi_tools/test/input/sample3.bai
Executable file
BIN
tools/umi_tools/test/input/sample3.bai
Executable file
Binary file not shown.
BIN
tools/umi_tools/test/input/sample3.bam
Executable file
BIN
tools/umi_tools/test/input/sample3.bam
Executable file
Binary file not shown.
BIN
tools/umi_tools/test/input/sample4.bai
Executable file
BIN
tools/umi_tools/test/input/sample4.bai
Executable file
Binary file not shown.
BIN
tools/umi_tools/test/input/sample4.bam
Executable file
BIN
tools/umi_tools/test/input/sample4.bam
Executable file
Binary file not shown.
BIN
tools/umi_tools/test/input/sample5.bai
Executable file
BIN
tools/umi_tools/test/input/sample5.bai
Executable file
Binary file not shown.
BIN
tools/umi_tools/test/input/sample5.bam
Executable file
BIN
tools/umi_tools/test/input/sample5.bam
Executable file
Binary file not shown.
BIN
tools/umi_tools/test/input/sample6.bai
Executable file
BIN
tools/umi_tools/test/input/sample6.bai
Executable file
Binary file not shown.
BIN
tools/umi_tools/test/input/sample6.bam
Executable file
BIN
tools/umi_tools/test/input/sample6.bam
Executable file
Binary file not shown.
62
tools/umi_tools/test/main.nf
Normal file
62
tools/umi_tools/test/main.nf
Normal file
|
@ -0,0 +1,62 @@
|
|||
#!/usr/bin/env nextflow
|
||||
|
||||
// Define DSL2
|
||||
nextflow.preview.dsl=2
|
||||
|
||||
// Log
|
||||
log.info ("Starting tests for umi_tools dedup...")
|
||||
|
||||
/*------------------------------------------------------------------------------------*/
|
||||
/* Define params
|
||||
--------------------------------------------------------------------------------------*/
|
||||
|
||||
params.umitools_dedup_args = '--umi-separator=":"'
|
||||
params.verbose = false
|
||||
|
||||
/*------------------------------------------------------------------------------------*/
|
||||
/* Module inclusions
|
||||
--------------------------------------------------------------------------------------*/
|
||||
|
||||
include umitools_dedup from '../main.nf'
|
||||
|
||||
/*------------------------------------------------------------------------------------*/
|
||||
/* Define input channels
|
||||
--------------------------------------------------------------------------------------*/
|
||||
|
||||
// Define test data
|
||||
testData = [
|
||||
['sample1', "$baseDir/input/sample1.bam", "$baseDir/input/sample1.bai"],
|
||||
['sample2', "$baseDir/input/sample2.bam", "$baseDir/input/sample2.bai"],
|
||||
['sample3', "$baseDir/input/sample3.bam", "$baseDir/input/sample3.bai"],
|
||||
['sample4', "$baseDir/input/sample4.bam", "$baseDir/input/sample4.bai"],
|
||||
['sample5', "$baseDir/input/sample5.bam", "$baseDir/input/sample5.bai"],
|
||||
['sample6', "$baseDir/input/sample6.bam", "$baseDir/input/sample6.bai"]
|
||||
]
|
||||
|
||||
//Define test data input channel
|
||||
Channel
|
||||
.from(testData)
|
||||
.map { row -> [ row[0], [file(row[1], checkIfExists: true), file(row[2], checkIfExists: true)]]}
|
||||
.set {ch_bam}
|
||||
|
||||
/*------------------------------------------------------------------------------------*/
|
||||
/* Run tests
|
||||
--------------------------------------------------------------------------------------*/
|
||||
|
||||
workflow {
|
||||
// Run dedup
|
||||
umitools_dedup ( ch_bam )
|
||||
}
|
||||
|
||||
workflow.onComplete {
|
||||
def proc = "$baseDir/verify-checksum.sh $baseDir/../../../results/umitools/dedup/*.bam $baseDir/output/*.bam".execute()
|
||||
def b = new StringBuffer()
|
||||
proc.consumeProcessErrorStream(b)
|
||||
|
||||
log.info proc.text
|
||||
|
||||
errorString = b.toString()
|
||||
if(errorString != '')
|
||||
log.error errorString
|
||||
exit 1
|
||||
}
|
2
tools/umi_tools/test/nextflow.config
Normal file
2
tools/umi_tools/test/nextflow.config
Normal file
|
@ -0,0 +1,2 @@
|
|||
params.outdir = './results'
|
||||
docker.enabled = true
|
40
tools/umi_tools/test/verify-checksum.sh
Executable file
40
tools/umi_tools/test/verify-checksum.sh
Executable file
|
@ -0,0 +1,40 @@
|
|||
#!/bin/sh
|
||||
|
||||
if [ -z "$1" ]
|
||||
then
|
||||
echo "No check pattern argument supplied" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$2" ]
|
||||
then
|
||||
echo "No verify pattern argument supplied" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
checkfiles=$1
|
||||
infiles=$2
|
||||
#echo $checkfiles
|
||||
#echo $infiles
|
||||
|
||||
echo '\nCalculating check file hashes...'
|
||||
md5sum $checkfiles
|
||||
|
||||
echo '\nCalculating input file hashes...'
|
||||
md5sum $infiles
|
||||
|
||||
echo '\nComparing hash of file of hashes...'
|
||||
checkver=$(md5sum $checkfiles | awk '{print $1}' | md5sum | awk '{print $1}')
|
||||
echo $checkver
|
||||
|
||||
inver=$(md5sum $infiles | awk '{print $1}' | md5sum | awk '{print $1}')
|
||||
echo $inver
|
||||
|
||||
if [ "$checkver" == "$inver" ]
|
||||
then
|
||||
echo "Hashes match"
|
||||
exit 0
|
||||
else
|
||||
echo "Hashes do not match" >&2
|
||||
exit 1
|
||||
fi
|
46
tools/umi_tools/umi_tools.yml
Normal file
46
tools/umi_tools/umi_tools.yml
Normal file
|
@ -0,0 +1,46 @@
|
|||
name: umi_tools
|
||||
version: 1.0
|
||||
description: Tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes.
|
||||
keywords:
|
||||
- UMI
|
||||
- RMT
|
||||
- Barcode
|
||||
tools:
|
||||
- umi_tools:
|
||||
description: |
|
||||
Tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes.
|
||||
homepage: https://github.com/CGATOxford/UMI-tools
|
||||
documentation: https://umi-tools.readthedocs.io/en/latest/
|
||||
processes:
|
||||
- dedup:
|
||||
operation: |
|
||||
Set command args to params.umitools_dedup_args
|
||||
The program will execute with the following pattern:
|
||||
umi_tools dedup --log={SAMPLE_ID}.dedup.log {params.umitools_dedup_args} -I {SAMPLE_ID}.bam -S {SAMPLE_ID}.dedup.bam --output-stats={SAMPLE_ID}
|
||||
description: |
|
||||
Groups PCR duplicates and de-duplicates reads to yield one read per group.
|
||||
Use this when you want to remove the PCR duplicates prior to any downstream analysis.
|
||||
input:
|
||||
- sample_id:
|
||||
type: string
|
||||
description: Sample identifier
|
||||
- bam:
|
||||
type: file array
|
||||
description: BAM sequence file and associated BAI index file
|
||||
output:
|
||||
- dedupBam:
|
||||
type: tuple
|
||||
description: A tuple of samples id and output bam file
|
||||
pattern: [sample_id, *SAMPLE_ID.dedup.bam]
|
||||
- dedupBam:
|
||||
type: tuple
|
||||
description: A tuple of samples id and output bai file
|
||||
pattern: [sample_id, *SAMPLE_ID.dedup.bam.bai]
|
||||
- report:
|
||||
type: file
|
||||
description: Log file for the umi_tools operation
|
||||
pattern: *SAMPLE_ID.dedup.log
|
||||
authors:
|
||||
- @candiceh08
|
||||
- @chris-cheshire
|
||||
|
Loading…
Reference in a new issue