Fgbio/fastqtobam (#1980)

* fix samtools stats thread usage

* fix samtools stats thread usage (#1975)

* fix samtools stats thread usage

* fix checksums

* Fix/samtools flagstat (#1979)

* fix samtools stats thread usage

* flagstat: fix threads

* Update busco to v5.4.3 and tar small files (#1970)

* Update busco to v5.4.3 and tar small files

* Fix file contain strings

* Swap md5sums to contains for variable files

* Simplify and generalize module

* add test.yml

* Update modules/fgbio/fastqtobam/meta.yml

Co-authored-by: Moritz E. Beber <midnighter@posteo.net>

* add support + tests for cram output

* more tweaks

* update test.yml

* prettier

* try to fix tests

* fix tests

* prettier

* drop checksum for cram

Co-authored-by: Mahesh Binzer-Panchal <mahesh.binzer-panchal@nbis.se>
Co-authored-by: Moritz E. Beber <midnighter@posteo.net>
This commit is contained in:
Matthias De Smet 2022-09-05 11:57:55 +02:00 committed by GitHub
parent 09acfad211
commit 36049bf544
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 146 additions and 29 deletions

View file

@ -9,11 +9,11 @@ process FGBIO_FASTQTOBAM {
input:
tuple val(meta), path(reads)
val read_structure
output:
tuple val(meta), path("*_umi_converted.bam"), emit: umibam
path "versions.yml" , emit: versions
tuple val(meta), path("*.bam") , emit: bam , optional: true
tuple val(meta), path("*.cram"), emit: cram, optional: true
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
@ -21,17 +21,19 @@ process FGBIO_FASTQTOBAM {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def sample_name = args.contains("--sample") ? "" : "--sample ${prefix}"
def library_name = args.contains("--library") ? "" : "--library ${prefix}"
def output = prefix =~ /\.(bam|cram)$/ ? prefix : "${prefix}.bam"
"""
fgbio \\
--tmp-dir=. \\
FastqToBam \\
-i $reads \\
-o "${prefix}_umi_converted.bam" \\
--read-structures $read_structure \\
--sample $meta.id \\
--library $meta.id \\
$args
${args} \\
--input ${reads} \\
--output ${output} \\
${sample_name} \\
${library_name}
cat <<-END_VERSIONS > versions.yml
"${task.process}":

View file

@ -1,6 +1,6 @@
name: fgbio_fastqtobam
description: |
Using the FGBIO tools, converts FASTQ files sequenced with UMIs into BAM files, moving the UMI barcode into the RX field of the BAM file
Using the fgbio tools, converts FASTQ files sequenced into unaligned BAM or CRAM files possibly moving the UMI barcode into the RX field of the reads
keywords:
- fastqtobam
- fgbio
@ -19,15 +19,6 @@ input:
description: pair of reads to be converted into BAM file
pattern: "*.{fastq.gz}"
- read_structure:
type: string
description: |
A read structure should always be provided for each of the fastq files.
If single end, the string will contain only one structure (i.e. "2M11S+T"), if paired-end the string
will contain two structures separated by a blank space (i.e. "2M11S+T 2M11S+T").
If the read does not contain any UMI, the structure will be +T (i.e. only template of any length).
https://github.com/fulcrumgenomics/fgbio/wiki/Read-Structures
output:
- meta:
type: map
@ -38,10 +29,15 @@ output:
type: file
description: File containing software version
pattern: "*.{version.yml}"
- umibam:
- bam:
type: file
description: Converted, unsorted BAM file with RX tag reporting UMI sequence (if any)
description: Unaligned, unsorted BAM file
pattern: "*.{bam}"
- cram:
type: file
description: Unaligned, unsorted CRAM file
pattern: "*.{cram}"
authors:
- "@lescai"
- "@matthdsm"

View file

@ -4,7 +4,7 @@ nextflow.enable.dsl = 2
include { FGBIO_FASTQTOBAM } from '../../../../modules/fgbio/fastqtobam/main.nf'
workflow test_fgbio_fastqtobam {
workflow test_fgbio_fastqtobam_paired_default {
input = [
[ id:'test', single_end:false ], // meta map
@ -13,7 +13,71 @@ workflow test_fgbio_fastqtobam {
file(params.test_data['homo_sapiens']['illumina']['test_umi_2_fastq_gz'], checkIfExists: true)
]
]
read_structure = "+T 12M11S+T"
FGBIO_FASTQTOBAM ( input, read_structure )
FGBIO_FASTQTOBAM ( input )
}
workflow test_fgbio_fastqtobam_paired_cram {
input = [
[ id:'test', single_end:false ], // meta map
[
file(params.test_data['homo_sapiens']['illumina']['test_umi_1_fastq_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_umi_2_fastq_gz'], checkIfExists: true)
]
]
FGBIO_FASTQTOBAM ( input )
}
workflow test_fgbio_fastqtobam_paired_bam {
input = [
[ id:'test', single_end:false ], // meta map
[
file(params.test_data['homo_sapiens']['illumina']['test_umi_1_fastq_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_umi_2_fastq_gz'], checkIfExists: true)
]
]
FGBIO_FASTQTOBAM ( input )
}
workflow test_fgbio_fastqtobam_single {
input = [
[ id:'test', single_end:false ], // meta map
[
file(params.test_data['homo_sapiens']['illumina']['test_umi_1_fastq_gz'], checkIfExists: true),
]
]
FGBIO_FASTQTOBAM ( input )
}
workflow test_fgbio_fastqtobam_paired_umi {
input = [
[ id:'test', single_end:false ], // meta map
[
file(params.test_data['homo_sapiens']['illumina']['test_umi_1_fastq_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_umi_2_fastq_gz'], checkIfExists: true)
]
]
FGBIO_FASTQTOBAM ( input )
}
workflow test_fgbio_fastqtobam_paired_custom_samplename {
input = [
[ id:'test', single_end:false ], // meta map
[
file(params.test_data['homo_sapiens']['illumina']['test_umi_1_fastq_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_umi_2_fastq_gz'], checkIfExists: true)
]
]
FGBIO_FASTQTOBAM ( input )
}

View file

@ -1,5 +1,16 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: "test_fgbio_fastqtobam_paired_cram:FGBIO_FASTQTOBAM" {
ext.prefix = {"${meta.id}.cram"}
}
withName: "test_fgbio_fastqtobam_paired_bam:FGBIO_FASTQTOBAM" {
ext.prefix = {"${meta.id}.bam"}
}
withName: "test_fgbio_fastqtobam_paired_umi:FGBIO_FASTQTOBAM" {
ext.args = "--read-structures +T 12M11S+T"
}
withName: "test_fgbio_fastqtobam_paired_custom_samplename:FGBIO_FASTQTOBAM" {
ext.args = "--sample CustomSample --library CustomLibrary"
}
}

View file

@ -1,8 +1,52 @@
- name: fgbio fastqtobam test_fgbio_fastqtobam
command: nextflow run ./tests/modules/fgbio/fastqtobam -entry test_fgbio_fastqtobam -c ./tests/config/nextflow.config -c ./tests/modules/fgbio/fastqtobam/nextflow.config
- name: fgbio fastqtobam test_fgbio_fastqtobam_paired_default
command: nextflow run ./tests/modules/fgbio/fastqtobam -entry test_fgbio_fastqtobam_paired_default -c ./tests/config/nextflow.config -c ./tests/modules/fgbio/fastqtobam/nextflow.config
tags:
- fgbio/fastqtobam
- fgbio
- fgbio/fastqtobam
files:
- path: output/fgbio/test_umi_converted.bam
- path: output/fgbio/test.bam
md5sum: 83cd60e01e5ab90ea3729431ce401add
- name: fgbio fastqtobam test_fgbio_fastqtobam_paired_cram
command: nextflow run ./tests/modules/fgbio/fastqtobam -entry test_fgbio_fastqtobam_paired_cram -c ./tests/config/nextflow.config -c ./tests/modules/fgbio/fastqtobam/nextflow.config
tags:
- fgbio
- fgbio/fastqtobam
files:
- path: output/fgbio/test.cram
- name: fgbio fastqtobam test_fgbio_fastqtobam_paired_bam
command: nextflow run ./tests/modules/fgbio/fastqtobam -entry test_fgbio_fastqtobam_paired_bam -c ./tests/config/nextflow.config -c ./tests/modules/fgbio/fastqtobam/nextflow.config
tags:
- fgbio
- fgbio/fastqtobam
files:
- path: output/fgbio/test.bam
md5sum: 83f879e3a302ca8a50dc9675be7a9216
- name: fgbio fastqtobam test_fgbio_fastqtobam_single
command: nextflow run ./tests/modules/fgbio/fastqtobam -entry test_fgbio_fastqtobam_single -c ./tests/config/nextflow.config -c ./tests/modules/fgbio/fastqtobam/nextflow.config
tags:
- fgbio
- fgbio/fastqtobam
files:
- path: output/fgbio/test.bam
md5sum: c08fe1b035b972808f05128e37e8e669
- name: fgbio fastqtobam test_fgbio_fastqtobam_paired_umi
command: nextflow run ./tests/modules/fgbio/fastqtobam -entry test_fgbio_fastqtobam_paired_umi -c ./tests/config/nextflow.config -c ./tests/modules/fgbio/fastqtobam/nextflow.config
tags:
- fgbio
- fgbio/fastqtobam
files:
- path: output/fgbio/test.bam
md5sum: 9510735554e5eff29244077a72075fb6
- name: fgbio fastqtobam test_fgbio_fastqtobam_paired_custom_samplename
command: nextflow run ./tests/modules/fgbio/fastqtobam -entry test_fgbio_fastqtobam_paired_custom_samplename -c ./tests/config/nextflow.config -c ./tests/modules/fgbio/fastqtobam/nextflow.config
tags:
- fgbio
- fgbio/fastqtobam
files:
- path: output/fgbio/test.bam
md5sum: ab8d7d2d9c1559b19a501d120296ebe0