From 84f02a08a5d6d881cb18c31fb15faca4e735a1f2 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 23 May 2022 22:31:09 +0100 Subject: [PATCH 1/2] Update umitools/dedup module to make --output-stats optional --- modules/umitools/dedup/main.nf | 10 +- modules/umitools/dedup/meta.yml | 4 + tests/modules/umitools/dedup/main.nf | 77 ++++++++----- tests/modules/umitools/dedup/nextflow.config | 6 +- tests/modules/umitools/dedup/test.yml | 107 +++++++++++++------ 5 files changed, 135 insertions(+), 69 deletions(-) diff --git a/modules/umitools/dedup/main.nf b/modules/umitools/dedup/main.nf index dfcbcf2f..07e6061d 100644 --- a/modules/umitools/dedup/main.nf +++ b/modules/umitools/dedup/main.nf @@ -9,12 +9,13 @@ process UMITOOLS_DEDUP { input: tuple val(meta), path(bam), path(bai) + val get_output_stats output: tuple val(meta), path("*.bam") , emit: bam - tuple val(meta), path("*edit_distance.tsv"), emit: tsv_edit_distance - tuple val(meta), path("*per_umi.tsv") , emit: tsv_per_umi - tuple val(meta), path("*per_position.tsv") , emit: tsv_umi_per_position + tuple val(meta), path("*edit_distance.tsv"), optional:true, emit: tsv_edit_distance + tuple val(meta), path("*per_umi.tsv") , optional:true, emit: tsv_per_umi + tuple val(meta), path("*per_position.tsv") , optional:true, emit: tsv_umi_per_position path "versions.yml" , emit: versions when: @@ -24,12 +25,13 @@ process UMITOOLS_DEDUP { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def paired = meta.single_end ? "" : "--paired" + def stats = get_output_stats ? "--output-stats $prefix" : "" """ umi_tools \\ dedup \\ -I $bam \\ -S ${prefix}.bam \\ - --output-stats $prefix \\ + $stats \\ $paired \\ $args diff --git a/modules/umitools/dedup/meta.yml b/modules/umitools/dedup/meta.yml index eee8952f..56888e5a 100644 --- a/modules/umitools/dedup/meta.yml +++ b/modules/umitools/dedup/meta.yml @@ -26,6 +26,10 @@ input: description: | BAM index files corresponding to the input BAM file. pattern: "*.{bai}" + - get_output_stats: + type: boolean + description: | + Whether or not to generate output stats. output: - meta: type: map diff --git a/tests/modules/umitools/dedup/main.nf b/tests/modules/umitools/dedup/main.nf index f89ba935..1edcb287 100644 --- a/tests/modules/umitools/dedup/main.nf +++ b/tests/modules/umitools/dedup/main.nf @@ -3,54 +3,81 @@ nextflow.enable.dsl = 2 include { UMITOOLS_EXTRACT } from '../../../../modules/umitools/extract/main.nf' -include { BWA_INDEX } from '../../../../modules/bwa/index/main.nf' -include { BWA_MEM } from '../../../../modules/bwa/mem/main.nf' +include { BWA_INDEX } from '../../../../modules/bwa/index/main.nf' +include { BWA_MEM } from '../../../../modules/bwa/mem/main.nf' include { SAMTOOLS_INDEX } from '../../../../modules/samtools/index/main.nf' -include { UMITOOLS_DEDUP } from '../../../../modules/umitools/dedup/main.nf' +include { UMITOOLS_DEDUP } from '../../../../modules/umitools/dedup/main.nf' // // Test with no UMI // workflow test_umitools_dedup_no_umi { - input = [ [ id:'test'], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ], - [ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) ] - ] + input = [ + [ id:'test'], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + ] + get_output_stats = false - UMITOOLS_DEDUP ( input ) + UMITOOLS_DEDUP ( input, get_output_stats ) } // -// Test with single-end data +// Test with single-end data without --output-stats // -workflow test_umitools_dedup_single_end { - input = [ [ id:'test', single_end:true ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] - ] - +workflow test_umitools_dedup_single_end_no_stats { + input = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + get_output_stats = false UMITOOLS_EXTRACT ( input ) BWA_INDEX ( fasta ) BWA_MEM ( UMITOOLS_EXTRACT.out.reads, BWA_INDEX.out.index, true ) - SAMTOOLS_INDEX (BWA_MEM.out.bam) - UMITOOLS_DEDUP(BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0])) + SAMTOOLS_INDEX ( BWA_MEM.out.bam ) + UMITOOLS_DEDUP ( BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]), get_output_stats ) } // -// Test with paired-end data +// Test with paired-end data without --output-stats // -workflow test_umitools_dedup_paired_end { - input = [ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] - ] - +workflow test_umitools_dedup_paired_end_no_stats { + input = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + get_output_stats = false UMITOOLS_EXTRACT ( input ) BWA_INDEX ( fasta ) BWA_MEM ( UMITOOLS_EXTRACT.out.reads, BWA_INDEX.out.index, true ) - SAMTOOLS_INDEX (BWA_MEM.out.bam) - UMITOOLS_DEDUP(BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0])) + SAMTOOLS_INDEX ( BWA_MEM.out.bam ) + UMITOOLS_DEDUP ( BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]), get_output_stats ) +} + +// +// Test with paired-end data with --output-stats +// +workflow test_umitools_dedup_paired_end_stats { + input = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + get_output_stats = true + + UMITOOLS_EXTRACT ( input ) + BWA_INDEX ( fasta ) + BWA_MEM ( UMITOOLS_EXTRACT.out.reads, BWA_INDEX.out.index, true ) + SAMTOOLS_INDEX ( BWA_MEM.out.bam ) + UMITOOLS_DEDUP ( BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]), get_output_stats ) } diff --git a/tests/modules/umitools/dedup/nextflow.config b/tests/modules/umitools/dedup/nextflow.config index 4a7533ef..a7bde28e 100644 --- a/tests/modules/umitools/dedup/nextflow.config +++ b/tests/modules/umitools/dedup/nextflow.config @@ -7,11 +7,7 @@ process { } withName: UMITOOLS_DEDUP { - ext.args = '' - ext.prefix = 'dedup' + ext.prefix = { "${meta.id}.dedup" } } - withName: BWA_MEM { - ext.args2 = '' - } } diff --git a/tests/modules/umitools/dedup/test.yml b/tests/modules/umitools/dedup/test.yml index 2ba9073b..ef4c2aaf 100644 --- a/tests/modules/umitools/dedup/test.yml +++ b/tests/modules/umitools/dedup/test.yml @@ -1,54 +1,91 @@ - name: umitools dedup test_umitools_dedup_no_umi - command: nextflow run tests/modules/umitools/dedup -entry test_umitools_dedup_no_umi -c tests/config/nextflow.config + command: nextflow run ./tests/modules/umitools/dedup -entry test_umitools_dedup_no_umi -c ./tests/config/nextflow.config -c ./tests/modules/umitools/dedup/nextflow.config tags: - umitools/dedup - umitools files: - - path: output/umitools/dedup.bam + - path: output/umitools/test.dedup.bam md5sum: 53b4edc399db81b87d2343e78af73cf0 - - path: output/umitools/dedup_edit_distance.tsv - md5sum: 65186b0964e2f8d970cc04d736d8b119 - - path: output/umitools/dedup_per_umi.tsv - md5sum: 8e6783a4a79437b095f095f2aefe7c01 - - path: output/umitools/dedup_per_umi_per_position.tsv - md5sum: 9386db4a104b8e4e32f3ca4a84efa4ac - - path: output/umitools/versions.yml - md5sum: 4aaaa33565bcd9a984255139933d6446 -- name: umitools dedup test_umitools_dedup_single_end - command: nextflow run tests/modules/umitools/dedup -entry test_umitools_dedup_single_end -c tests/config/nextflow.config +- name: umitools dedup test_umitools_dedup_single_end_no_stats + command: nextflow run ./tests/modules/umitools/dedup -entry test_umitools_dedup_single_end_no_stats -c ./tests/config/nextflow.config -c ./tests/modules/umitools/dedup/nextflow.config tags: - - umitools - umitools/dedup + - umitools files: + - path: output/bwa/bwa/genome.amb + md5sum: 3a68b8b2287e07dd3f5f95f4344ba76e + - path: output/bwa/bwa/genome.ann + md5sum: c32e11f6c859f166c7525a9c1d583567 + - path: output/bwa/bwa/genome.bwt + md5sum: 0469c30a1e239dd08f68afe66fde99da + - path: output/bwa/bwa/genome.pac + md5sum: 983e3d2cd6f36e2546e6d25a0da78d66 + - path: output/bwa/bwa/genome.sa + md5sum: ab3952cabf026b48cd3eb5bccbb636d1 - path: output/bwa/test.bam - md5sum: ea41a3cdca1856b22845e1067fd31f37 - - path: output/bwa/versions.yml - md5sum: ce4d987f2c53f4c01b31d210c357b24a + md5sum: 3ecbe569cadb9b6c881917ce60779f75 - path: output/samtools/test.bam.bai md5sum: 095af0ad3921212597ffd7c342ecd5a0 - - path: output/samtools/versions.yml - md5sum: 69b7cde627c9b4e8403dfc125db71cc7 - - path: output/umitools/dedup.bam - md5sum: d95df177063432748ff33f473910cb1e - - path: output/umitools/versions.yml - md5sum: 730e768dd199d2f5bfb6fd0850446344 + - path: output/umitools/test.dedup.bam + md5sum: 8f7c519e110d6515d858eda6b16727ac + - path: output/umitools/test.umi_extract.fastq.gz + - path: output/umitools/test.umi_extract.log -- name: umitools dedup test_umitools_dedup_paired_end - command: nextflow run tests/modules/umitools/dedup -entry test_umitools_dedup_paired_end -c tests/config/nextflow.config +- name: umitools dedup test_umitools_dedup_paired_end_no_stats + command: nextflow run ./tests/modules/umitools/dedup -entry test_umitools_dedup_paired_end_no_stats -c ./tests/config/nextflow.config -c ./tests/modules/umitools/dedup/nextflow.config tags: - - umitools - umitools/dedup + - umitools files: + - path: output/bwa/bwa/genome.amb + md5sum: 3a68b8b2287e07dd3f5f95f4344ba76e + - path: output/bwa/bwa/genome.ann + md5sum: c32e11f6c859f166c7525a9c1d583567 + - path: output/bwa/bwa/genome.bwt + md5sum: 0469c30a1e239dd08f68afe66fde99da + - path: output/bwa/bwa/genome.pac + md5sum: 983e3d2cd6f36e2546e6d25a0da78d66 + - path: output/bwa/bwa/genome.sa + md5sum: ab3952cabf026b48cd3eb5bccbb636d1 - path: output/bwa/test.bam - md5sum: 1ad786cae0ff2254c655e3a206929617 - - path: output/bwa/versions.yml - md5sum: b524c5ddf61c20f4a0a93ae8fc78b851 + md5sum: e7dcbac1825bf210409b762dbb4fec8f - path: output/samtools/test.bam.bai - md5sum: 7496f4056a8e86327ca93e350f282fc2 - - path: output/samtools/versions.yml - md5sum: 72fc2ab934fd4bca0f7f14a705530d34 - - path: output/umitools/dedup.bam - md5sum: e8d1eae2aacef76254948c5568e94555 - - path: output/umitools/versions.yml - md5sum: fd39e05042d354b3d8de49b617d3183d + md5sum: f75780d1de7860329b7fb4afeadc4bed + - path: output/umitools/test.dedup.bam + md5sum: d75284de88b05569a66667e5b9936be9 + - path: output/umitools/test.umi_extract.log + - path: output/umitools/test.umi_extract_1.fastq.gz + - path: output/umitools/test.umi_extract_2.fastq.gz + +- name: umitools dedup test_umitools_dedup_paired_end_stats + command: nextflow run ./tests/modules/umitools/dedup -entry test_umitools_dedup_paired_end_stats -c ./tests/config/nextflow.config -c ./tests/modules/umitools/dedup/nextflow.config + tags: + - umitools/dedup + - umitools + files: + - path: output/bwa/bwa/genome.amb + md5sum: 3a68b8b2287e07dd3f5f95f4344ba76e + - path: output/bwa/bwa/genome.ann + md5sum: c32e11f6c859f166c7525a9c1d583567 + - path: output/bwa/bwa/genome.bwt + md5sum: 0469c30a1e239dd08f68afe66fde99da + - path: output/bwa/bwa/genome.pac + md5sum: 983e3d2cd6f36e2546e6d25a0da78d66 + - path: output/bwa/bwa/genome.sa + md5sum: ab3952cabf026b48cd3eb5bccbb636d1 + - path: output/bwa/test.bam + md5sum: e7dcbac1825bf210409b762dbb4fec8f + - path: output/samtools/test.bam.bai + md5sum: f75780d1de7860329b7fb4afeadc4bed + - path: output/umitools/test.dedup.bam + md5sum: d75284de88b05569a66667e5b9936be9 + - path: output/umitools/test.dedup_edit_distance.tsv + md5sum: c247a49b58768e6e2e86a6c08483e612 + - path: output/umitools/test.dedup_per_umi.tsv + md5sum: 10e35ca37f2bfb521ac6dd7314951a68 + - path: output/umitools/test.dedup_per_umi_per_position.tsv + md5sum: 2e1a12e6f720510880068deddeefe063 + - path: output/umitools/test.umi_extract.log + - path: output/umitools/test.umi_extract_1.fastq.gz + - path: output/umitools/test.umi_extract_2.fastq.gz From a3e70777ad7df0aaf9bbeff11e302c01bb54346d Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 23 May 2022 22:39:12 +0100 Subject: [PATCH 2/2] Fix Conda tests --- tests/modules/umitools/dedup/test.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/modules/umitools/dedup/test.yml b/tests/modules/umitools/dedup/test.yml index ef4c2aaf..5a92a35a 100644 --- a/tests/modules/umitools/dedup/test.yml +++ b/tests/modules/umitools/dedup/test.yml @@ -5,7 +5,6 @@ - umitools files: - path: output/umitools/test.dedup.bam - md5sum: 53b4edc399db81b87d2343e78af73cf0 - name: umitools dedup test_umitools_dedup_single_end_no_stats command: nextflow run ./tests/modules/umitools/dedup -entry test_umitools_dedup_single_end_no_stats -c ./tests/config/nextflow.config -c ./tests/modules/umitools/dedup/nextflow.config @@ -28,7 +27,6 @@ - path: output/samtools/test.bam.bai md5sum: 095af0ad3921212597ffd7c342ecd5a0 - path: output/umitools/test.dedup.bam - md5sum: 8f7c519e110d6515d858eda6b16727ac - path: output/umitools/test.umi_extract.fastq.gz - path: output/umitools/test.umi_extract.log @@ -53,7 +51,6 @@ - path: output/samtools/test.bam.bai md5sum: f75780d1de7860329b7fb4afeadc4bed - path: output/umitools/test.dedup.bam - md5sum: d75284de88b05569a66667e5b9936be9 - path: output/umitools/test.umi_extract.log - path: output/umitools/test.umi_extract_1.fastq.gz - path: output/umitools/test.umi_extract_2.fastq.gz @@ -79,7 +76,6 @@ - path: output/samtools/test.bam.bai md5sum: f75780d1de7860329b7fb4afeadc4bed - path: output/umitools/test.dedup.bam - md5sum: d75284de88b05569a66667e5b9936be9 - path: output/umitools/test.dedup_edit_distance.tsv md5sum: c247a49b58768e6e2e86a6c08483e612 - path: output/umitools/test.dedup_per_umi.tsv