New module - BBMap clumpify (#1978)

* Update meta.yml by fixing html pattern expected 

Fixed html pattern typo:

Before: pattern: "*.thml"

After fix: pattern: "*.html"

* main.nf for bbmap_clumpify

* meta.yml for bbmap_clumpify

* bbmap/clumpify via Gitpod and @mahesh-panchal guide

* small changes to tests

* explicit mention of output file pattern - *.clumped.fastq.gz

* Update modules/bbmap/clumpify/main.nf

Co-authored-by: Mahesh Binzer-Panchal <mahesh.binzer-panchal@nbis.se>

* Update modules/bbmap/clumpify/main.nf

Accepted @mahesh-panchal suggestions

Co-authored-by: Mahesh Binzer-Panchal <mahesh.binzer-panchal@nbis.se>

Co-authored-by: Mahesh Binzer-Panchal <mahesh.binzer-panchal@nbis.se>
Co-authored-by: Matthias De Smet <11850640+matthdsm@users.noreply.github.com>
This commit is contained in:
tamuanand 2022-09-05 02:23:08 -04:00 committed by GitHub
parent f881eb7950
commit 57b43ed3de
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 138 additions and 0 deletions

View file

@ -0,0 +1,38 @@
process BBMAP_CLUMPIFY {
tag "$meta.id"
label 'process_single'
label 'process_high_memory'
conda (params.enable_conda ? "bioconda::bbmap=38.98" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bbmap:38.98--h5c4e2a8_1' :
'quay.io/biocontainers/bbmap:38.98--h5c4e2a8_1' }"
input:
tuple val(meta), path(reads)
output:
tuple val(meta), path('*.fastq.gz'), emit: reads
tuple val(meta), path('*.log') , emit: log
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def raw = meta.single_end ? "in=$reads" : "in1=${reads[0]} in2=${reads[1]}"
def clumped = meta.single_end ? "out=${prefix}.clumped.fastq.gz" : "out1=${prefix}_1.clumped.fastq.gz out2=${prefix}_2.clumped.fastq.gz"
"""
clumpify.sh \\
$raw \\
$clumped \\
$args \\
&> ${prefix}.clumpify.log
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bbmap: \$(bbversion.sh)
END_VERSIONS
"""
}

View file

@ -0,0 +1,49 @@
name: bbmap_clumpify
description: Create 30% Smaller, Faster Gzipped Fastq Files. And remove duplicates
keywords:
- clumping fastqs
- smaller fastqs
- deduping
- fastq
tools:
- bbmap:
description: BBMap is a short read aligner, as well as various other bioinformatic tools.
homepage: https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/clumpify-guide/
documentation: https://www.biostars.org/p/225338/
tool_dev_url: None
doi: ""
licence: ["UC-LBL license (see package)"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: The reordered/clumped (and if necessary deduped) fastq reads
pattern: "*.clumped.fastq.gz"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- log:
type: file
description: Clumpify log file
pattern: "*clumpify.log"
authors:
- "@tamuanand"

View file

@ -146,6 +146,10 @@ bbmap/bbsplit:
- modules/bbmap/bbsplit/**
- tests/modules/bbmap/bbsplit/**
bbmap/clumpify:
- modules/bbmap/clumpify/**
- tests/modules/bbmap/clumpify/**
bbmap/index:
- modules/bbmap/index/**
- tests/modules/bbmap/index/**

View file

@ -0,0 +1,24 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { BBMAP_CLUMPIFY } from '../../../../modules/bbmap/clumpify/main.nf'
workflow test_bbmap_clumpify_single_end {
input = [ [ id:'test', single_end:true ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
]
BBMAP_CLUMPIFY ( input )
}
workflow test_bbmap_clumpify_paired_end {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
]
BBMAP_CLUMPIFY ( input )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,18 @@
- name: bbmap clumpify test_bbmap_clumpify_single_end
command: nextflow run ./tests/modules/bbmap/clumpify -entry test_bbmap_clumpify_single_end -c ./tests/config/nextflow.config -c ./tests/modules/bbmap/clumpify/nextflow.config
tags:
- bbmap/clumpify
- bbmap
files:
- path: output/bbmap/test.clumped.fastq.gz
- path: output/bbmap/test.clumpify.log
- name: bbmap clumpify test_bbmap_clumpify_paired_end
command: nextflow run ./tests/modules/bbmap/clumpify -entry test_bbmap_clumpify_paired_end -c ./tests/config/nextflow.config -c ./tests/modules/bbmap/clumpify/nextflow.config
tags:
- bbmap/clumpify
- bbmap
files:
- path: output/bbmap/test.clumpify.log
- path: output/bbmap/test_1.clumped.fastq.gz
- path: output/bbmap/test_2.clumped.fastq.gz