mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-11-10 20:23:10 +00:00
Added UMI sub-workflow (#1098)
* added code for subworkflow fgbio call umi consensus * ironing out a few typos etc * fixing last things * fixed md5sum - lets see if it changes * removing file accidentally deleted * tidy indents * added bwamem2 alternative * fixed entry for both tests * changed name second test workflow entry * fixed workflow entry names * fixed md5sum for file generated with bwamem2 * added syntax new DSL2 * added new config location in test command line * added new config location in test command line * use of prefix instead of suffix because modules have been changed in this way * explicit alias to bwa mem1 to avoid confusion * removed param that should be an ext optional argument in fgbio groupreadsbyumi * missing colon in config * missing colon in module config too * order list alphabetically Co-authored-by: Maxime U. Garcia <maxime.garcia@scilifelab.se> * remove params from body Co-authored-by: Maxime U. Garcia <maxime.garcia@scilifelab.se> * improving readability of input structure Co-authored-by: Mahesh Binzer-Panchal <mahesh.binzer-panchal@nbis.se> * reverting to mandatory input * fixed tests and workflow take values * remove param Co-authored-by: Maxime U. Garcia <maxime.garcia@scilifelab.se> * simplify tests params Co-authored-by: Maxime U. Garcia <maxime.garcia@scilifelab.se> * formatting inputs for readability * factoring in changes to bwamem2_mem and bwa_mem sort/view inputs * updating test md5sum for grouped file following code update in bwamem Co-authored-by: Maxime U. Garcia <maxime.garcia@scilifelab.se> Co-authored-by: Maxime U. Garcia <max.u.garcia@gmail.com> Co-authored-by: Mahesh Binzer-Panchal <mahesh.binzer-panchal@nbis.se>
This commit is contained in:
parent
37c5cb495d
commit
a68c563e54
6 changed files with 240 additions and 2 deletions
86
subworkflows/nf-core/fgbio_create_umi_consensus/main.nf
Normal file
86
subworkflows/nf-core/fgbio_create_umi_consensus/main.nf
Normal file
|
@ -0,0 +1,86 @@
|
|||
//
|
||||
// Runs FGBIO tools to remove UMI tags from FASTQ reads
|
||||
// Convert them to unmapped BAM file, map them to the reference genome,
|
||||
// use the mapped information to group UMIs and generate consensus reads
|
||||
//
|
||||
|
||||
|
||||
include { BWAMEM2_INDEX } from '../../../modules/bwamem2/index/main.nf'
|
||||
include { BWAMEM2_MEM } from '../../../modules/bwamem2/mem/main'
|
||||
include { BWA_INDEX as BWAMEM1_INDEX } from '../../../modules/bwa/index/main.nf'
|
||||
include { BWA_MEM as BWAMEM1_MEM } from '../../../modules/bwa/mem/main'
|
||||
include { FGBIO_CALLMOLECULARCONSENSUSREADS as CALLUMICONSENSUS } from '../../../modules/fgbio/callmolecularconsensusreads/main.nf'
|
||||
include { FGBIO_FASTQTOBAM as FASTQTOBAM } from '../../../modules/fgbio/fastqtobam/main'
|
||||
include { FGBIO_GROUPREADSBYUMI as GROUPREADSBYUMI } from '../../../modules/fgbio/groupreadsbyumi/main'
|
||||
include { SAMBLASTER } from '../../../modules/samblaster/main'
|
||||
include { SAMTOOLS_BAM2FQ as BAM2FASTQ } from '../../../modules/samtools/bam2fq/main.nf'
|
||||
|
||||
|
||||
workflow CREATE_UMI_CONSENSUS {
|
||||
take:
|
||||
reads // channel: [mandatory] [ val(meta), [ reads ] ]
|
||||
fasta // channel: [mandatory] /path/to/reference/fasta
|
||||
read_structure // string: [mandatory] "read_structure"
|
||||
groupreadsbyumi_strategy // string: [mandatory] grouping strategy - default: "Adjacency"
|
||||
aligner // string: [mandatory] "bwa-mem" or "bwa-mem2"
|
||||
|
||||
main:
|
||||
ch_versions = Channel.empty()
|
||||
|
||||
// using information in val(read_structure) FASTQ reads are converted into
|
||||
// a tagged unmapped BAM file (uBAM)
|
||||
FASTQTOBAM ( reads, read_structure )
|
||||
ch_versions = ch_versions.mix(FASTQTOBAM.out.version)
|
||||
|
||||
// in order to map uBAM using BWA MEM, we need to convert uBAM to FASTQ
|
||||
// but keep the appropriate UMI tags in the FASTQ comment field and produce
|
||||
// an interleaved FASQT file (hence, split = false)
|
||||
split = false
|
||||
BAM2FASTQ ( FASTQTOBAM.out.umibam, split )
|
||||
ch_versions = ch_versions.mix(BAM2FASTQ.out.versions)
|
||||
|
||||
// the user can choose here to use either bwa-mem (default) or bwa-mem2
|
||||
aligned_bam = Channel.empty()
|
||||
|
||||
if (aligner == "bwa-mem") {
|
||||
// reference is indexed
|
||||
BWAMEM1_INDEX ( fasta )
|
||||
ch_versions = ch_versions.mix(BWAMEM1_INDEX.out.versions)
|
||||
|
||||
// appropriately tagged interleaved FASTQ reads are mapped to the reference
|
||||
BWAMEM1_MEM ( BAM2FASTQ.out.reads, BWAMEM1_INDEX.out.index, false )
|
||||
ch_versions = ch_versions.mix(BWAMEM1_MEM.out.versions)
|
||||
aligned_bam = BWAMEM1_MEM.out.bam
|
||||
} else {
|
||||
// reference is indexed
|
||||
BWAMEM2_INDEX ( fasta )
|
||||
ch_versions = ch_versions.mix(BWAMEM2_INDEX.out.versions)
|
||||
|
||||
// appropriately tagged interleaved FASTQ reads are mapped to the reference
|
||||
BWAMEM2_MEM ( BAM2FASTQ.out.reads, BWAMEM2_INDEX.out.index, false )
|
||||
ch_versions = ch_versions.mix(BWAMEM2_MEM.out.versions)
|
||||
aligned_bam = BWAMEM2_MEM.out.bam
|
||||
}
|
||||
|
||||
// samblaster is used in order to tag mates information in the BAM file
|
||||
// this is used in order to group reads by UMI
|
||||
SAMBLASTER ( aligned_bam )
|
||||
ch_versions = ch_versions.mix(SAMBLASTER.out.versions)
|
||||
|
||||
// appropriately tagged reads are now grouped by UMI information
|
||||
GROUPREADSBYUMI ( SAMBLASTER.out.bam, groupreadsbyumi_strategy )
|
||||
ch_versions = ch_versions.mix(GROUPREADSBYUMI.out.versions)
|
||||
|
||||
// using the above created groups, a consensus across reads in the same grou
|
||||
// can be called
|
||||
// this will emit a consensus BAM file
|
||||
CALLUMICONSENSUS ( GROUPREADSBYUMI.out.bam )
|
||||
ch_versions = ch_versions.mix(CALLUMICONSENSUS.out.versions)
|
||||
|
||||
emit:
|
||||
ubam = FASTQTOBAM.out.umibam // channel: [ val(meta), [ bam ] ]
|
||||
groupbam = GROUPREADSBYUMI.out.bam // channel: [ val(meta), [ bam ] ]
|
||||
consensusbam = CALLUMICONSENSUS.out.bam // channel: [ val(meta), [ bam ] ]
|
||||
versions = ch_versions // channel: [ versions.yml ]
|
||||
}
|
||||
|
67
subworkflows/nf-core/fgbio_create_umi_consensus/meta.yml
Normal file
67
subworkflows/nf-core/fgbio_create_umi_consensus/meta.yml
Normal file
|
@ -0,0 +1,67 @@
|
|||
name: fgbio_create_umi_consensus
|
||||
description: |
|
||||
This workflow uses the suite FGBIO to identify and remove UMI tags from FASTQ reads
|
||||
convert them to unmapped BAM file, map them to the reference genome,
|
||||
and finally use the mapped information to group UMIs and generate consensus reads in each group
|
||||
keywords:
|
||||
- fgbio
|
||||
- umi
|
||||
- samblaster
|
||||
- samtools
|
||||
- bwa
|
||||
modules:
|
||||
- bwa/index
|
||||
- bwa/mem
|
||||
- fgbio/fastqtobam
|
||||
- fgbio/groupreadsbyumi
|
||||
- fgbio/callmolecularconsensusreads
|
||||
- samblaster
|
||||
- samtools/bam2fq
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test' ]
|
||||
- reads:
|
||||
type: list
|
||||
description: list umi-tagged reads
|
||||
pattern: "[ *.{fastq.gz/fq.gz} ]"
|
||||
- fasta:
|
||||
type: file
|
||||
description: The reference fasta file
|
||||
pattern: "*.fasta"
|
||||
- read_structure:
|
||||
type: string
|
||||
description: |
|
||||
A read structure should always be provided for each of the fastq files.
|
||||
If single end, the string will contain only one structure (i.e. "2M11S+T"), if paired-end the string
|
||||
will contain two structures separated by a blank space (i.e. "2M11S+T 2M11S+T").
|
||||
If the read does not contain any UMI, the structure will be +T (i.e. only template of any length).
|
||||
https://github.com/fulcrumgenomics/fgbio/wiki/Read-Structures
|
||||
- groupreadsbyumi_strategy:
|
||||
type: string
|
||||
description: |
|
||||
Reguired argument: defines the UMI assignment strategy.
|
||||
Must be chosen among: Identity, Edit, Adjacency, Paired.
|
||||
output:
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: 'versions.yml'
|
||||
- ubam:
|
||||
type: file
|
||||
description: unmapped bam file
|
||||
pattern: '*.bam'
|
||||
- groupbam:
|
||||
type: file
|
||||
description: mapped bam file, where reads are grouped by UMI tag
|
||||
pattern: '*.bam'
|
||||
- consensusbam:
|
||||
type: file
|
||||
description: |
|
||||
mapped bam file, where reads are created as consensus of those
|
||||
belonging to the same UMI group
|
||||
pattern: '*.bam'
|
||||
authors:
|
||||
- '@lescai'
|
|
@ -10,7 +10,6 @@ workflow test_fgbio_groupreadsbyumi {
|
|||
[ id:'test', single_end:false ], // meta map
|
||||
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_umi_unsorted_tagged_bam'], checkIfExists: true)
|
||||
]
|
||||
strategy = "Adjacency"
|
||||
|
||||
FGBIO_GROUPREADSBYUMI ( input, strategy )
|
||||
FGBIO_GROUPREADSBYUMI ( input, 'Adjacency' )
|
||||
}
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
#!/usr/bin/env nextflow
|
||||
|
||||
nextflow.enable.dsl = 2
|
||||
|
||||
include { CREATE_UMI_CONSENSUS } from '../../../../subworkflows/nf-core/fgbio_create_umi_consensus/main'
|
||||
|
||||
workflow test_fgbio_create_umi_consensus_mem1 {
|
||||
reads = [
|
||||
[ id:'test', single_end:false ], // meta map
|
||||
[
|
||||
file(params.test_data['homo_sapiens']['illumina']['test_umi_1_fastq_gz'], checkIfExists: true),
|
||||
file(params.test_data['homo_sapiens']['illumina']['test_umi_2_fastq_gz'], checkIfExists: true)
|
||||
]
|
||||
]
|
||||
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
|
||||
read_structure = "+T 12M11S+T"
|
||||
|
||||
CREATE_UMI_CONSENSUS( reads, fasta, read_structure, "Adjacency", "bwa-mem" )
|
||||
}
|
||||
|
||||
workflow test_fgbio_create_umi_consensus_mem2 {
|
||||
reads = [
|
||||
[ id:'test', single_end:false ], // meta map
|
||||
[
|
||||
file(params.test_data['homo_sapiens']['illumina']['test_umi_1_fastq_gz'], checkIfExists: true),
|
||||
file(params.test_data['homo_sapiens']['illumina']['test_umi_2_fastq_gz'], checkIfExists: true)
|
||||
]
|
||||
]
|
||||
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
|
||||
read_structure = "+T 12M11S+T"
|
||||
|
||||
CREATE_UMI_CONSENSUS( reads, fasta, read_structure, "Adjacency", "bwa-mem2" )
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
process {
|
||||
|
||||
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
|
||||
|
||||
withName: SAMTOOLS_BAM2FQ {
|
||||
ext.args = '-T RX'
|
||||
}
|
||||
|
||||
withName: BWA_MEM {
|
||||
ext.args = '-p -C -M'
|
||||
}
|
||||
|
||||
withName: BWAMEM2_MEM {
|
||||
ext.args = '-p -C -M'
|
||||
}
|
||||
|
||||
withName: FGBIO_CALLMOLECULARCONSENSUSREADS {
|
||||
ext.args = '-M 1 -S Coordinate'
|
||||
ext.prefix = { "${meta.id}_umiconsensus" }
|
||||
}
|
||||
|
||||
withName: SAMTOOLS_BAM2FQ {
|
||||
ext.args = '-T RX'
|
||||
}
|
||||
|
||||
withName: SAMBLASTER {
|
||||
ext.args = '-M --addMateTags'
|
||||
ext.prefix = { "${meta.id}_processed" }
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
- name: fgbio_create_umi_consensus_bwamem1
|
||||
command: nextflow run ./tests/subworkflows/nf-core/fgbio_create_umi_consensus -entry test_fgbio_create_umi_consensus_mem1 -c ./tests/config/nextflow.config -c ./tests/subworkflows/nf-core/fgbio_create_umi_consensus/nextflow.config
|
||||
tags:
|
||||
- subworkflows/fgbio_create_umi_consensus
|
||||
files:
|
||||
- path: ./output/fastqtobam/test_umi_converted.bam
|
||||
md5sum: 9510735554e5eff29244077a72075fb6
|
||||
- path: ./output/groupreadsbyumi/test_umi-grouped.bam
|
||||
md5sum: 44f31da850d5a8100b43b629426f2e17
|
||||
- path: ./output/callumiconsensus/test_umiconsensus.bam
|
||||
md5sum: 24b48e3543de0ae7e8a95c116d5ca6a6
|
||||
- name: fgbio_create_umi_consensus_bwamem2
|
||||
command: nextflow run ./tests/subworkflows/nf-core/fgbio_create_umi_consensus -entry test_fgbio_create_umi_consensus_mem2 -c ./tests/config/nextflow.config -c ./tests/subworkflows/nf-core/fgbio_create_umi_consensus/nextflow.config
|
||||
tags:
|
||||
- subworkflows/fgbio_create_umi_consensus_bwamem2
|
||||
files:
|
||||
- path: ./output/fastqtobam/test_umi_converted.bam
|
||||
md5sum: 9510735554e5eff29244077a72075fb6
|
||||
- path: ./output/groupreadsbyumi/test_umi-grouped.bam
|
||||
md5sum: c69333155038b9a968fd096627d4dfb0
|
||||
- path: ./output/callumiconsensus/test_umiconsensus.bam
|
||||
md5sum: 24b48e3543de0ae7e8a95c116d5ca6a6
|
Loading…
Reference in a new issue