From dd6d838cd7ac2b15dd5e56ecb1dd5b3cea63d78c Mon Sep 17 00:00:00 2001 From: SusiJo Date: Wed, 25 May 2022 16:37:44 +0200 Subject: [PATCH 01/23] override userEmulation --- tests/modules/gatk4/markduplicatesspark/nextflow.config | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/modules/gatk4/markduplicatesspark/nextflow.config b/tests/modules/gatk4/markduplicatesspark/nextflow.config index 8730f1c4..c66f00e6 100644 --- a/tests/modules/gatk4/markduplicatesspark/nextflow.config +++ b/tests/modules/gatk4/markduplicatesspark/nextflow.config @@ -3,3 +3,6 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } } +// necessary to override tests/config/nextflow.config +docker.userEmulation = false + From 1400f2e68361fc04642b7c95a0cdf50e098bf926 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Wed, 25 May 2022 16:38:30 +0200 Subject: [PATCH 02/23] fixed tests - mixed chr21 + chr22 --- .../modules/gatk4/markduplicatesspark/main.nf | 24 +++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/tests/modules/gatk4/markduplicatesspark/main.nf b/tests/modules/gatk4/markduplicatesspark/main.nf index 2f294f59..cd02c240 100644 --- a/tests/modules/gatk4/markduplicatesspark/main.nf +++ b/tests/modules/gatk4/markduplicatesspark/main.nf @@ -6,18 +6,32 @@ include { GATK4_MARKDUPLICATES_SPARK } from '../../../../modules/gatk4/markdupli workflow test_gatk4_markduplicates_spark { input = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] - fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) - fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) - dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict ) } +// chr 22 workflow test_gatk4_markduplicates_spark_multiple_bams { input = [ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true) + ] ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + + GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict ) +} + +// chr 21 +workflow test_gatk4_markduplicates_spark_multiple_bams_21 { + input = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true) ] ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) From b34e5ea6bf56d25536324f593e9797a3d9c4ccc7 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Wed, 25 May 2022 16:38:48 +0200 Subject: [PATCH 03/23] rm unnecessary spark_user --- modules/gatk4/markduplicatesspark/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index 77e135db..8265e16f 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -25,6 +25,7 @@ process GATK4_MARKDUPLICATES_SPARK { prefix = task.ext.prefix ?: "${meta.id}" def input_list = bam.collect{"--input $it"}.join(' ') + def avail_mem = 3 if (!task.memory) { log.info '[GATK MarkDuplicatesSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -32,7 +33,6 @@ process GATK4_MARKDUPLICATES_SPARK { avail_mem = task.memory.giga } """ - export SPARK_USER=spark3 gatk --java-options "-Xmx${avail_mem}g" MarkDuplicatesSpark \\ $input_list \\ From 39e34418c90f822f301ab4b2959ced36a4f70f05 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 14:34:50 +0200 Subject: [PATCH 04/23] added metrics as optional output --- modules/gatk4/markduplicatesspark/main.nf | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index 8265e16f..eeaf0445 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -14,15 +14,16 @@ process GATK4_MARKDUPLICATES_SPARK { path dict output: - tuple val(meta), path("${prefix}"), emit: output - path "versions.yml" , emit: versions + tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path("*.metrics"), emit: metrics, optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: "${meta.id}" + ".bam" def input_list = bam.collect{"--input $it"}.join(' ') From e0e8273f7462926b6e58e12183d2ca3484f9e07e Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 14:35:18 +0200 Subject: [PATCH 05/23] adjusted input to use queryname sorted bams --- .../modules/gatk4/markduplicatesspark/main.nf | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tests/modules/gatk4/markduplicatesspark/main.nf b/tests/modules/gatk4/markduplicatesspark/main.nf index cd02c240..ac89b1b3 100644 --- a/tests/modules/gatk4/markduplicatesspark/main.nf +++ b/tests/modules/gatk4/markduplicatesspark/main.nf @@ -3,6 +3,7 @@ nextflow.enable.dsl = 2 include { GATK4_MARKDUPLICATES_SPARK } from '../../../../modules/gatk4/markduplicatesspark/main.nf' +include { GATK4_MARKDUPLICATES_SPARK as GATK4_MARKDUPLICATES_SPARK_METRICS } from '../../../../modules/gatk4/markduplicatesspark/main.nf' workflow test_gatk4_markduplicates_spark { input = [ [ id:'test', single_end:false ], // meta map @@ -18,8 +19,8 @@ workflow test_gatk4_markduplicates_spark { // chr 22 workflow test_gatk4_markduplicates_spark_multiple_bams { input = [ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true) + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true) ] ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) @@ -28,15 +29,15 @@ workflow test_gatk4_markduplicates_spark_multiple_bams { GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict ) } -// chr 21 -workflow test_gatk4_markduplicates_spark_multiple_bams_21 { +// chr 22 +workflow test_gatk4_markduplicates_spark_multiple_bams_metrics { input = [ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true) + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true) ] ] - fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) - fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) - dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) - GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict ) + GATK4_MARKDUPLICATES_SPARK_METRICS ( input, fasta, fai, dict ) } From 3117b2973c44677217c2e128890949eba6c94451 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 14:35:29 +0200 Subject: [PATCH 06/23] added test for metrics --- tests/modules/gatk4/markduplicatesspark/nextflow.config | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/modules/gatk4/markduplicatesspark/nextflow.config b/tests/modules/gatk4/markduplicatesspark/nextflow.config index c66f00e6..495a42f3 100644 --- a/tests/modules/gatk4/markduplicatesspark/nextflow.config +++ b/tests/modules/gatk4/markduplicatesspark/nextflow.config @@ -2,7 +2,10 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + withName: GATK4_MARKDUPLICATES_SPARK_METRICS { + ext.args = '--metrics-file test.metrics' + } } -// necessary to override tests/config/nextflow.config +// override tests/config/nextflow.config docker.userEmulation = false From ea526051a44a8b25a943f6d58f789a0745b853d5 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 14:35:42 +0200 Subject: [PATCH 07/23] new test.yml --- .../gatk4/markduplicatesspark/test.yml | 32 +++++++++++-------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index b0c0b40d..34fe0101 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -1,25 +1,31 @@ -- name: gatk4 markduplicates test_gatk4_markduplicates_spark - command: nextflow run tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config +- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark + command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config tags: - gatk4 - gatk4/markduplicatesspark files: - - path: output/gatk4/test.bai - md5sum: e9c125e82553209933883b4fe2b8d7c2 - path: output/gatk4/test.bam - md5sum: 2efd50b2e6b7fd9bdf242cd9e266cfa9 - - path: output/gatk4/test.metrics - - path: output/gatk4/versions.yml + md5sum: dc1a09ac6371aab7c50d1a554baa06d3 -- name: gatk4 markduplicates test_gatk4_markduplicates_spark_multiple_bams - command: nextflow run tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config +- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams + command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config tags: - gatk4 - gatk4/markduplicatesspark files: - - path: output/gatk4/test.bai - md5sum: bad71df9c876e72a5bc0a3e0fd755f92 - path: output/gatk4/test.bam - md5sum: 8187febc6108ffef7f907e89b9c091a4 + md5sum: 898cb0a6616897d8ada90bab53bf0837 + +- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_metrics + command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_metrics -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config + tags: + - gatk4 + - gatk4/markduplicatesspark + files: + - path: output/gatk4/test.bam + md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics - - path: output/gatk4/versions.yml + contains: '[ ## METRICS CLASS org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics + LIBRARY UNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED SECONDARY_OR_SUPPLEMENTARY_RDS UNMAPPED_READS UNPAIRED_READ_DUPLICATES READ_PAIR_DUPLICATES READ_PAIR_OPTICAL_DUPLICATESPERCENT_DUPLICATION ESTIMATED_LIBRARY_SIZE + testN 0 2820 2 2 0 828 0 0.293617 3807 + testT 0 2718 0 0 0 716 0 0.263429 4206 ]' From e083bfafa0b34f69ba1dfa00e38d58d0b565572b Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 14:38:45 +0200 Subject: [PATCH 08/23] ran prettier --- tests/modules/gatk4/markduplicatesspark/test.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index 34fe0101..a0631183 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -25,7 +25,8 @@ - path: output/gatk4/test.bam md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics - contains: '[ ## METRICS CLASS org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics - LIBRARY UNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED SECONDARY_OR_SUPPLEMENTARY_RDS UNMAPPED_READS UNPAIRED_READ_DUPLICATES READ_PAIR_DUPLICATES READ_PAIR_OPTICAL_DUPLICATESPERCENT_DUPLICATION ESTIMATED_LIBRARY_SIZE - testN 0 2820 2 2 0 828 0 0.293617 3807 - testT 0 2718 0 0 0 716 0 0.263429 4206 ]' + contains: + "[ ## METRICS CLASS org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics + LIBRARY UNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED SECONDARY_OR_SUPPLEMENTARY_RDS UNMAPPED_READS UNPAIRED_READ_DUPLICATES READ_PAIR_DUPLICATES READ_PAIR_OPTICAL_DUPLICATESPERCENT_DUPLICATION ESTIMATED_LIBRARY_SIZE + testN 0 2820 2 2 0 828 0 0.293617 3807 + testT 0 2718 0 0 0 716 0 0.263429 4206 ]" From d484aa190636690a58941aa90f95a08010fa6f4f Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 14:45:23 +0200 Subject: [PATCH 09/23] adjusted tabs --- tests/modules/gatk4/markduplicatesspark/test.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index a0631183..862b8a16 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -26,7 +26,7 @@ md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics contains: - "[ ## METRICS CLASS org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics - LIBRARY UNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED SECONDARY_OR_SUPPLEMENTARY_RDS UNMAPPED_READS UNPAIRED_READ_DUPLICATES READ_PAIR_DUPLICATES READ_PAIR_OPTICAL_DUPLICATESPERCENT_DUPLICATION ESTIMATED_LIBRARY_SIZE - testN 0 2820 2 2 0 828 0 0.293617 3807 - testT 0 2718 0 0 0 716 0 0.263429 4206 ]" + "## METRICS CLASS\torg.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics + LIBRARY\tUNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED\tSECONDARY_OR_SUPPLEMENTARY_RDS\tUNMAPPED_READS\tUNPAIRED_READ_DUPLICATES\tREAD_PAIR_DUPLICATES\tREAD_PAIR_OPTICAL_DUPLICATESPERCENT_DUPLICATION\tESTIMATED_LIBRARY_SIZE + testN\t0\t2820\t2\t2\t0\t828\t0\t0.293617\t3807 + testT\t0\t2718\t0\t0\t0\t716\t0\t0.263429\t4206" From 205576279fa63e7d9ffe3a2ec72eed6eb54b7476 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 14:50:55 +0200 Subject: [PATCH 10/23] ran prettier again --- tests/modules/gatk4/markduplicatesspark/test.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index 862b8a16..a240c65e 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -25,8 +25,7 @@ - path: output/gatk4/test.bam md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics - contains: - "## METRICS CLASS\torg.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics + contains: "## METRICS CLASS\torg.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics LIBRARY\tUNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED\tSECONDARY_OR_SUPPLEMENTARY_RDS\tUNMAPPED_READS\tUNPAIRED_READ_DUPLICATES\tREAD_PAIR_DUPLICATES\tREAD_PAIR_OPTICAL_DUPLICATESPERCENT_DUPLICATION\tESTIMATED_LIBRARY_SIZE testN\t0\t2820\t2\t2\t0\t828\t0\t0.293617\t3807 testT\t0\t2718\t0\t0\t0\t716\t0\t0.263429\t4206" From 9ef09e6add67b9125c69c989c9d2b1f426ad6244 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 14:54:39 +0200 Subject: [PATCH 11/23] adjusted newlines --- tests/modules/gatk4/markduplicatesspark/test.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index a240c65e..ea7f7d4b 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -25,7 +25,4 @@ - path: output/gatk4/test.bam md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics - contains: "## METRICS CLASS\torg.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics - LIBRARY\tUNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED\tSECONDARY_OR_SUPPLEMENTARY_RDS\tUNMAPPED_READS\tUNPAIRED_READ_DUPLICATES\tREAD_PAIR_DUPLICATES\tREAD_PAIR_OPTICAL_DUPLICATESPERCENT_DUPLICATION\tESTIMATED_LIBRARY_SIZE - testN\t0\t2820\t2\t2\t0\t828\t0\t0.293617\t3807 - testT\t0\t2718\t0\t0\t0\t716\t0\t0.263429\t4206" + contains: "## METRICS CLASS\torg.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics\nLIBRARY\tUNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED\tSECONDARY_OR_SUPPLEMENTARY_RDS\tUNMAPPED_READS\tUNPAIRED_READ_DUPLICATES\tREAD_PAIR_DUPLICATES\tREAD_PAIR_OPTICAL_DUPLICATESPERCENT_DUPLICATION\tESTIMATED_LIBRARY_SIZE\ntestN\t0\t2820\t2\t2\t0\t828\t0\t0.293617\t3807\ntestT\t0\t2718\t0\t0\t0\t716\t0\t0.263429\t4206" From 37b543e3e70c5474bbdcdb834bf9d51c52e743ee Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 15:29:27 +0200 Subject: [PATCH 12/23] changed to tab --- tests/modules/gatk4/markduplicatesspark/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index ea7f7d4b..5ee751c4 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -25,4 +25,4 @@ - path: output/gatk4/test.bam md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics - contains: "## METRICS CLASS\torg.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics\nLIBRARY\tUNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED\tSECONDARY_OR_SUPPLEMENTARY_RDS\tUNMAPPED_READS\tUNPAIRED_READ_DUPLICATES\tREAD_PAIR_DUPLICATES\tREAD_PAIR_OPTICAL_DUPLICATESPERCENT_DUPLICATION\tESTIMATED_LIBRARY_SIZE\ntestN\t0\t2820\t2\t2\t0\t828\t0\t0.293617\t3807\ntestT\t0\t2718\t0\t0\t0\t716\t0\t0.263429\t4206" + contains: "## METRICS CLASS org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics" From 35e296cd7d52225fbceb4f928c86f1433203835e Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 15:35:34 +0200 Subject: [PATCH 13/23] added brackets --- tests/modules/gatk4/markduplicatesspark/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index 5ee751c4..25c8ec38 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -25,4 +25,4 @@ - path: output/gatk4/test.bam md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics - contains: "## METRICS CLASS org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics" + contains: '[ ## METRICS CLASS org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics ]' From 941760f75d04b863d0534c210b41505d64972e95 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 15:36:02 +0200 Subject: [PATCH 14/23] ran prettier --- tests/modules/gatk4/markduplicatesspark/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index 25c8ec38..9c235d7a 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -25,4 +25,4 @@ - path: output/gatk4/test.bam md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics - contains: '[ ## METRICS CLASS org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics ]' + contains: "[ ## METRICS CLASS org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics ]" From 58389e550d9d091be164ac5fef440edb286e52c0 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 15:41:50 +0200 Subject: [PATCH 15/23] test json array --- tests/modules/gatk4/markduplicatesspark/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index 9c235d7a..24ff6d84 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -25,4 +25,4 @@ - path: output/gatk4/test.bam md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics - contains: "[ ## METRICS CLASS org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics ]" + contains: "[ "## METRICS CLASS", "org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics" ]" From 84f584875e542e6b624ac751034af2ddab7860ef Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 15:50:31 +0200 Subject: [PATCH 16/23] change string array --- tests/modules/gatk4/markduplicatesspark/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index 24ff6d84..21323ada 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -25,4 +25,4 @@ - path: output/gatk4/test.bam md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics - contains: "[ "## METRICS CLASS", "org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics" ]" + contains: ["## METRICS CLASS", "org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics"] From 44daa5d6874f655862524a29a5579aea6f6af52b Mon Sep 17 00:00:00 2001 From: SusiJo <43847534+SusiJo@users.noreply.github.com> Date: Thu, 2 Jun 2022 16:23:09 +0200 Subject: [PATCH 17/23] update prefix Co-authored-by: Maxime U. Garcia --- modules/gatk4/markduplicatesspark/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index eeaf0445..5acfd45f 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -23,7 +23,7 @@ process GATK4_MARKDUPLICATES_SPARK { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + ".bam" + def prefix = task.ext.prefix ?: "${meta.id}.bam" def input_list = bam.collect{"--input $it"}.join(' ') From efc1e1bb71592704e5f2631dabdef48c7c3af1d7 Mon Sep 17 00:00:00 2001 From: SusiJo <43847534+SusiJo@users.noreply.github.com> Date: Thu, 2 Jun 2022 16:23:18 +0200 Subject: [PATCH 18/23] rm line Co-authored-by: Maxime U. Garcia --- modules/gatk4/markduplicatesspark/main.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index 5acfd45f..703623d5 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -34,7 +34,6 @@ process GATK4_MARKDUPLICATES_SPARK { avail_mem = task.memory.giga } """ - gatk --java-options "-Xmx${avail_mem}g" MarkDuplicatesSpark \\ $input_list \\ --output $prefix \\ From 230224700edcdf3a8c2d241cafc974e4724e48f4 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Fri, 3 Jun 2022 12:45:52 +0200 Subject: [PATCH 19/23] change prefix --- modules/gatk4/markduplicatesspark/main.nf | 2 +- tests/modules/gatk4/markduplicatesspark/main.nf | 14 ++++++++++++++ .../gatk4/markduplicatesspark/nextflow.config | 8 ++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index 703623d5..55049fba 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -23,7 +23,7 @@ process GATK4_MARKDUPLICATES_SPARK { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}.bam" + def prefix = task.ext.prefix ?: "${meta.id}" def input_list = bam.collect{"--input $it"}.join(' ') diff --git a/tests/modules/gatk4/markduplicatesspark/main.nf b/tests/modules/gatk4/markduplicatesspark/main.nf index ac89b1b3..004fbb1e 100644 --- a/tests/modules/gatk4/markduplicatesspark/main.nf +++ b/tests/modules/gatk4/markduplicatesspark/main.nf @@ -3,6 +3,7 @@ nextflow.enable.dsl = 2 include { GATK4_MARKDUPLICATES_SPARK } from '../../../../modules/gatk4/markduplicatesspark/main.nf' +include { GATK4_MARKDUPLICATES_SPARK as GATK4_MARKDUPLICATES_SPARK_CRAM } from '../../../../modules/gatk4/markduplicatesspark/main.nf' include { GATK4_MARKDUPLICATES_SPARK as GATK4_MARKDUPLICATES_SPARK_METRICS } from '../../../../modules/gatk4/markduplicatesspark/main.nf' workflow test_gatk4_markduplicates_spark { @@ -29,6 +30,19 @@ workflow test_gatk4_markduplicates_spark_multiple_bams { GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict ) } +// chr 22 +workflow test_gatk4_markduplicates_spark_multiple_bams_cram_out { + input = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true) + ] ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + + GATK4_MARKDUPLICATES_SPARK_CRAM ( input, fasta, fai, dict ) +} + // chr 22 workflow test_gatk4_markduplicates_spark_multiple_bams_metrics { input = [ [ id:'test', single_end:false ], // meta map diff --git a/tests/modules/gatk4/markduplicatesspark/nextflow.config b/tests/modules/gatk4/markduplicatesspark/nextflow.config index 495a42f3..565016b7 100644 --- a/tests/modules/gatk4/markduplicatesspark/nextflow.config +++ b/tests/modules/gatk4/markduplicatesspark/nextflow.config @@ -2,9 +2,17 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + withName: GATK4_MARKDUPLICATES_SPARK { + ext.prefix = { "${meta.id}.bam" } + } + withName: GATK4_MARKDUPLICATES_SPARK_CRAM { + ext.prefix = { "${meta.id}.cram" } + } withName: GATK4_MARKDUPLICATES_SPARK_METRICS { + ext.prefix = { "${meta.id}.bam" } ext.args = '--metrics-file test.metrics' } + } // override tests/config/nextflow.config docker.userEmulation = false From e2bb56608c6934f5228e38a157eae3f8fc58742f Mon Sep 17 00:00:00 2001 From: SusiJo Date: Fri, 3 Jun 2022 13:50:45 +0200 Subject: [PATCH 20/23] change prefix output --- modules/gatk4/markduplicatesspark/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index 55049fba..79e12814 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -14,7 +14,7 @@ process GATK4_MARKDUPLICATES_SPARK { path dict output: - tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path("${prefix}"), emit: output tuple val(meta), path("*.metrics"), emit: metrics, optional: true path "versions.yml" , emit: versions From dbca9e8b361187bf4c297045927cbcbf5034c312 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 7 Jun 2022 13:58:57 +0200 Subject: [PATCH 21/23] adjusted prefix + test + authors --- modules/gatk4/markduplicatesspark/main.nf | 2 +- modules/gatk4/markduplicatesspark/meta.yml | 1 + tests/modules/gatk4/markduplicatesspark/nextflow.config | 2 +- tests/modules/gatk4/markduplicatesspark/test.yml | 9 +++++++++ 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index 79e12814..945f105d 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -23,7 +23,7 @@ process GATK4_MARKDUPLICATES_SPARK { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" def input_list = bam.collect{"--input $it"}.join(' ') diff --git a/modules/gatk4/markduplicatesspark/meta.yml b/modules/gatk4/markduplicatesspark/meta.yml index bf3e02ba..59be9b6d 100644 --- a/modules/gatk4/markduplicatesspark/meta.yml +++ b/modules/gatk4/markduplicatesspark/meta.yml @@ -58,3 +58,4 @@ authors: - "@ajodeh-juma" - "@FriederikeHanssen" - "@maxulysse" + - "@SusiJo" diff --git a/tests/modules/gatk4/markduplicatesspark/nextflow.config b/tests/modules/gatk4/markduplicatesspark/nextflow.config index 565016b7..e0455b4b 100644 --- a/tests/modules/gatk4/markduplicatesspark/nextflow.config +++ b/tests/modules/gatk4/markduplicatesspark/nextflow.config @@ -9,8 +9,8 @@ process { ext.prefix = { "${meta.id}.cram" } } withName: GATK4_MARKDUPLICATES_SPARK_METRICS { - ext.prefix = { "${meta.id}.bam" } ext.args = '--metrics-file test.metrics' + ext.prefix = { "${meta.id}.bam" } } } diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index 21323ada..31d8d34d 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -16,6 +16,15 @@ - path: output/gatk4/test.bam md5sum: 898cb0a6616897d8ada90bab53bf0837 +- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_cram_out + command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_cram_out -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config + tags: + - gatk4 + - gatk4/markduplicatesspark + files: + - path: output/gatk4/test.cram + md5sum: 2271016de5e4199736598f39d12d7587 + - name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_metrics command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_metrics -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config tags: From 745e47cd33bad1303257b669d22c913abd4bf94e Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 7 Jun 2022 14:38:43 +0200 Subject: [PATCH 22/23] add versions.yml --- tests/modules/gatk4/markduplicatesspark/test.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index 31d8d34d..4ef9b916 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -6,6 +6,7 @@ files: - path: output/gatk4/test.bam md5sum: dc1a09ac6371aab7c50d1a554baa06d3 + - path: output/gatk4/versions.yml - name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config @@ -15,6 +16,7 @@ files: - path: output/gatk4/test.bam md5sum: 898cb0a6616897d8ada90bab53bf0837 + - path: output/gatk4/versions.yml - name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_cram_out command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_cram_out -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config @@ -24,6 +26,7 @@ files: - path: output/gatk4/test.cram md5sum: 2271016de5e4199736598f39d12d7587 + - path: output/gatk4/versions.yml - name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_metrics command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_metrics -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config @@ -35,3 +38,4 @@ md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics contains: ["## METRICS CLASS", "org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics"] + - path: output/gatk4/versions.yml From cda434611f851bbc4fe3d876e6b702cf721a0175 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Thu, 9 Jun 2022 10:24:39 +0200 Subject: [PATCH 23/23] fix conda with openjdk 1.8 --- modules/gatk4/markduplicatesspark/main.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index 945f105d..db6a1aa1 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -2,7 +2,7 @@ process GATK4_MARKDUPLICATES_SPARK { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) + conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0 conda-forge::openjdk=8.0.312" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : 'broadinstitute/gatk:4.2.3.0' }" @@ -45,6 +45,7 @@ process GATK4_MARKDUPLICATES_SPARK { cat <<-END_VERSIONS > versions.yml "${task.process}": gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + openjdk: \$(echo \$(java -version 2>&1) | grep version | sed 's/\"//g' | cut -f3 -d ' ') END_VERSIONS """ }