* feat(dragmap): Add initial hastable module

* feat(dragmap): Add initial align module

* test(dragmap): Remove md5sum

Forgot sam files have a header. Might pipe this through samtools.

* build(dragmap): Add mulled container

* chore(dragmap): Update prefix

* feat(dragmap): Output a bam file

* feat(dragmap): Add log files

* Update modules/dragmap/align/meta.yml

Co-authored-by: Jose Espinosa-Carrasco <kadomu@gmail.com>
This commit is contained in:
Edmund Miller 2021-12-06 08:56:41 +00:00 committed by GitHub
parent cd94731789
commit f3ffa69b8d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 266 additions and 0 deletions

View file

@ -0,0 +1,59 @@
process DRAGMAP_ALIGN {
tag "$meta.id"
label 'process_high'
conda (params.enable_conda ? "bioconda::dragmap=1.2.1 bioconda::samtools=1.14 conda-forge::pigz=2.3.4" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-580d344d9d4a496cd403932da8765f9e0187774d:f7aad9060cde739c95685fc5ff6d6f7e3ec629c8-0':
'quay.io/biocontainers/mulled-v2-580d344d9d4a496cd403932da8765f9e0187774d:f7aad9060cde739c95685fc5ff6d6f7e3ec629c8-0' }"
input:
tuple val(meta), path(reads)
path hashmap
output:
tuple val(meta), path("*.bam"), emit: bam
tuple val(meta), path('*.log'), emit: log
path "versions.yml" , emit: versions
script:
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
if (meta.single_end) {
"""
dragen-os \\
-r $hashmap \\
-1 $reads \\
--num-threads $task.cpus \\
$args \\
2> ${prefix}.dragmap.log \\
| samtools view -@ $task.cpus $args2 -bhS -o ${prefix}.bam -
cat <<-END_VERSIONS > versions.yml
"${task.process}":
dragmap: \$(echo \$(dragen-os --version 2>&1))
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
END_VERSIONS
"""
} else {
"""
dragen-os \\
-r $hashmap \\
-1 ${reads[0]} \\
-2 ${reads[1]} \\
--num-threads $task.cpus \\
$args \\
2> ${prefix}.dragmap.log \\
| samtools view -@ $task.cpus $args2 -bhS -o ${prefix}.bam -
cat <<-END_VERSIONS > versions.yml
"${task.process}":
dragmap: \$(echo \$(dragen-os --version 2>&1))
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
END_VERSIONS
"""
}
}

View file

@ -0,0 +1,42 @@
name: dragmap_align
description: Performs fastq alignment to a reference using DRAGMAP
keywords:
- alignment
- map
- fastq
- bam
- sam
tools:
- dragmap:
description: Dragmap is the Dragen mapper/aligner Open Source Software.
homepage: https://github.com/Illumina/dragmap
documentation: https://github.com/Illumina/dragmap
tool_dev_url: https://github.com/Illumina/dragmap#basic-command-line-usage
doi: ""
licence: ['GPL v3']
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
- hashmap:
type: file
description: DRAGMAP hash table
pattern: "Directory containing DRAGMAP hash table *.{cmp,.bin,.txt}"
output:
- bam:
type: file
description: Output BAM file containing read alignments
pattern: "*.{bam}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@Emiller88"

View file

@ -0,0 +1,33 @@
process DRAGMAP_HASHTABLE {
tag "$fasta"
label 'process_high'
conda (params.enable_conda ? "bioconda::dragmap=1.2.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/dragmap:1.2.1--hd4ca14e_0':
'quay.io/biocontainers/dragmap:1.2.1--hd4ca14e_0' }"
input:
path fasta
output:
path "dragmap" , emit: hashmap
path "versions.yml" , emit: versions
script:
def args = task.ext.args ?: ''
"""
mkdir dragmap
dragen-os \\
--build-hash-table true \\
--ht-reference $fasta \\
--output-directory dragmap \\
$args \\
--ht-num-threads $task.cpus
cat <<-END_VERSIONS > versions.yml
"${task.process}":
dragmap: \$(echo \$(dragen-os --version 2>&1))
END_VERSIONS
"""
}

View file

@ -0,0 +1,30 @@
name: dragmap_hashtable
description: Create DRAGEN hashtable for reference genome
keywords:
- index
- fasta
- genome
- reference
tools:
- dragmap:
description: Dragmap is the Dragen mapper/aligner Open Source Software.
homepage: https://github.com/Illumina/dragmap
documentation: https://github.com/Illumina/dragmap
tool_dev_url: https://github.com/Illumina/dragmap#basic-command-line-usage
doi: ""
licence: ['GPL v3']
input:
- fasta:
type: file
description: Input genome fasta file
output:
- hashmap:
type: file
description: DRAGMAP hash table
pattern: "*.{cmp,.bin,.txt}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@Emiller88"

View file

@ -402,6 +402,14 @@ diamond/makedb:
- modules/diamond/makedb/**
- tests/modules/diamond/makedb/**
dragmap/align:
- modules/dragmap/align/**
- tests/modules/dragmap/align/**
dragmap/hashtable:
- modules/dragmap/hashtable/**
- tests/modules/dragmap/hashtable/**
dragonflye:
- modules/dragonflye/**
- tests/modules/dragonflye/**

View file

@ -0,0 +1,33 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { DRAGMAP_HASHTABLE } from '../../../../modules/dragmap/hashtable/main.nf'
include { DRAGMAP_ALIGN } from '../../../../modules/dragmap/align/main.nf'
workflow test_dragmap_align_single_end {
input = [
[ id:'test', single_end:true ], // meta map
[
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
]
]
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
DRAGMAP_HASHTABLE ( fasta )
DRAGMAP_ALIGN ( input, DRAGMAP_HASHTABLE.out.hashmap )
}
workflow test_dragmap_align_paired_end {
input = [
[ id:'test', single_end:false ], // meta map
[
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
]
]
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
DRAGMAP_HASHTABLE ( fasta )
DRAGMAP_ALIGN ( input, DRAGMAP_HASHTABLE.out.hashmap )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,17 @@
- name: dragmap align single-end
command: nextflow run ./tests/modules/dragmap/align -entry test_dragmap_align_single_end -c ./tests/config/nextflow.config -c ./tests/modules/dragmap/align/nextflow.config
tags:
- dragmap
- dragmap/align
files:
- path: output/dragmap/test.bam
- path: output/dragmap/test.dragmap.log
- name: dragmap align paired-end
command: nextflow run ./tests/modules/dragmap/align -entry test_dragmap_align_paired_end -c ./tests/config/nextflow.config -c ./tests/modules/dragmap/align/nextflow.config
tags:
- dragmap
- dragmap/align
files:
- path: output/dragmap/test.bam
- path: output/dragmap/test.dragmap.log

View file

@ -0,0 +1,15 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { DRAGMAP_HASHTABLE } from '../../../../modules/dragmap/hashtable/main.nf'
workflow test_dragmap_hashtable {
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
DRAGMAP_HASHTABLE ( fasta )
}
// TODO Add test using alt-masked bed file
// https://github.com/Illumina/dragmap#build-hash-table-using-an-alt-masked-bed-file

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,19 @@
- name: dragmap hashtable
command: nextflow run ./tests/modules/dragmap/hashtable -entry test_dragmap_hashtable -c ./tests/config/nextflow.config -c ./tests/modules/dragmap/hashtable/nextflow.config
tags:
- dragmap
- dragmap/hashtable
files:
- path: output/dragmap/dragmap/hash_table.cfg
- path: output/dragmap/dragmap/hash_table.cfg.bin
- path: output/dragmap/dragmap/hash_table.cmp
md5sum: bc210e5358fd65656f9aea297b59ec7d
- path: output/dragmap/dragmap/hash_table_stats.txt
- path: output/dragmap/dragmap/reference.bin
md5sum: b6b5c12a42416b990cd2844de8f33c5d
- path: output/dragmap/dragmap/ref_index.bin
md5sum: 8470be9566ecee77eb4aea6a38922a66
- path: output/dragmap/dragmap/repeat_mask.bin
md5sum: 2439259a2fd32a1d0f4c53d585f3da3a
- path: output/dragmap/dragmap/str_table.bin
md5sum: 302e2b30993973527e69c6bcd1f093d0