From 80366924fdd0b0183cebe2258f25807db5b93c10 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 31 May 2022 16:08:13 +0200 Subject: [PATCH 01/26] added gatk4/composestrtablefile --- modules/gatk4/composestrtablefile/main.nf | 51 +++++++++++++++++++ modules/gatk4/composestrtablefile/meta.yml | 43 ++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ .../modules/gatk4/composestrtablefile/main.nf | 16 ++++++ .../gatk4/composestrtablefile/nextflow.config | 5 ++ .../gatk4/composestrtablefile/test.yml | 8 +++ 6 files changed, 127 insertions(+) create mode 100644 modules/gatk4/composestrtablefile/main.nf create mode 100644 modules/gatk4/composestrtablefile/meta.yml create mode 100644 tests/modules/gatk4/composestrtablefile/main.nf create mode 100644 tests/modules/gatk4/composestrtablefile/nextflow.config create mode 100644 tests/modules/gatk4/composestrtablefile/test.yml diff --git a/modules/gatk4/composestrtablefile/main.nf b/modules/gatk4/composestrtablefile/main.nf new file mode 100644 index 00000000..a1114f2a --- /dev/null +++ b/modules/gatk4/composestrtablefile/main.nf @@ -0,0 +1,51 @@ +process GATK4_COMPOSESTRTABLEFILE { + tag "$fasta" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': + 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + + input: + tuple path(fasta), path(fasta_fai), path(dict) + + output: + path "*.zip" , emit: str_table + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def avail_mem = 6 + if (!task.memory) { + log.info '[GATK ComposeSTRTableFile] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + gatk --java-options "-Xmx${avail_mem}g" ComposeSTRTableFile \\ + --reference $fasta \\ + --output ${fasta.baseName}.zip \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + touch test.zip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/gatk4/composestrtablefile/meta.yml b/modules/gatk4/composestrtablefile/meta.yml new file mode 100644 index 00000000..eb825ef4 --- /dev/null +++ b/modules/gatk4/composestrtablefile/meta.yml @@ -0,0 +1,43 @@ +name: "gatk4_composestrtablefile" +description: This tool looks for low-complexity STR sequences along the reference that are later used to estimate the Dragstr model during single sample auto calibration CalibrateDragstrModel. +keywords: + - gatk4 + - composestrtablefile +tools: + - gatk4: + description: + Genome Analysis Toolkit (GATK4). Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/4405451249819-ComposeSTRTableFile + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + - fasta: + type: file + description: FASTA reference file + pattern: "*.{fasta,fa}" + - fasta_fai: + type: file + description: index of the FASTA reference file + pattern: "*.fai" + - dict: + type: file + description: Sequence dictionary of the FASTA reference file + pattern: "*.dict" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - str_table: + type: file + description: A zipped folder containing the STR table files + pattern: "*.zip" + +authors: + - "@nvnieuwk" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 3ff58b5c..bf4a6137 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -751,6 +751,10 @@ gatk4/combinegvcfs: - modules/gatk4/combinegvcfs/** - tests/modules/gatk4/combinegvcfs/** +gatk4/composestrtablefile: + - modules/gatk4/composestrtablefile/** + - tests/modules/gatk4/composestrtablefile/** + gatk4/createsequencedictionary: - modules/gatk4/createsequencedictionary/** - tests/modules/gatk4/createsequencedictionary/** diff --git a/tests/modules/gatk4/composestrtablefile/main.nf b/tests/modules/gatk4/composestrtablefile/main.nf new file mode 100644 index 00000000..9fc2e5f5 --- /dev/null +++ b/tests/modules/gatk4/composestrtablefile/main.nf @@ -0,0 +1,16 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK4_COMPOSESTRTABLEFILE } from '../../../../modules/gatk4/composestrtablefile/main.nf' + +workflow test_gatk4_composestrtablefile { + + input = [ + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + ] + + GATK4_COMPOSESTRTABLEFILE ( input ) +} diff --git a/tests/modules/gatk4/composestrtablefile/nextflow.config b/tests/modules/gatk4/composestrtablefile/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/gatk4/composestrtablefile/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/gatk4/composestrtablefile/test.yml b/tests/modules/gatk4/composestrtablefile/test.yml new file mode 100644 index 00000000..3bafb348 --- /dev/null +++ b/tests/modules/gatk4/composestrtablefile/test.yml @@ -0,0 +1,8 @@ +- name: gatk4 composestrtablefile test_gatk4_composestrtablefile + command: nextflow run ./tests/modules/gatk4/composestrtablefile -entry test_gatk4_composestrtablefile -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/composestrtablefile/nextflow.config + tags: + - gatk4/composestrtablefile + - gatk4 + files: + - path: output/gatk4/genome.zip + contains: "[ # TODO nf-core: file md5sum was variable, please replace this text with a string found in the file instead ]" From ec41ea6082fcfaadcb8a6a4f02979144de86cd91 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 31 May 2022 16:12:05 +0200 Subject: [PATCH 02/26] fix test.yml --- tests/modules/gatk4/composestrtablefile/test.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/modules/gatk4/composestrtablefile/test.yml b/tests/modules/gatk4/composestrtablefile/test.yml index 3bafb348..514ae79c 100644 --- a/tests/modules/gatk4/composestrtablefile/test.yml +++ b/tests/modules/gatk4/composestrtablefile/test.yml @@ -1,8 +1,7 @@ - name: gatk4 composestrtablefile test_gatk4_composestrtablefile command: nextflow run ./tests/modules/gatk4/composestrtablefile -entry test_gatk4_composestrtablefile -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/composestrtablefile/nextflow.config tags: - - gatk4/composestrtablefile - gatk4 + - gatk4/composestrtablefile files: - - path: output/gatk4/genome.zip - contains: "[ # TODO nf-core: file md5sum was variable, please replace this text with a string found in the file instead ]" + - path: output/gatk4/genome.zip \ No newline at end of file From 1166fb51505b39983af8ee16ecb382222eec22bb Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 31 May 2022 16:13:38 +0200 Subject: [PATCH 03/26] prettier --- tests/modules/gatk4/composestrtablefile/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/composestrtablefile/test.yml b/tests/modules/gatk4/composestrtablefile/test.yml index 514ae79c..597ae144 100644 --- a/tests/modules/gatk4/composestrtablefile/test.yml +++ b/tests/modules/gatk4/composestrtablefile/test.yml @@ -4,4 +4,4 @@ - gatk4 - gatk4/composestrtablefile files: - - path: output/gatk4/genome.zip \ No newline at end of file + - path: output/gatk4/genome.zip From 5b74a179c3323d683065e820b2fb5a80fdaedcb7 Mon Sep 17 00:00:00 2001 From: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Date: Wed, 1 Jun 2022 09:14:08 +0200 Subject: [PATCH 04/26] Update modules/gatk4/composestrtablefile/main.nf Co-authored-by: FriederikeHanssen --- modules/gatk4/composestrtablefile/main.nf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/gatk4/composestrtablefile/main.nf b/modules/gatk4/composestrtablefile/main.nf index a1114f2a..dae61166 100644 --- a/modules/gatk4/composestrtablefile/main.nf +++ b/modules/gatk4/composestrtablefile/main.nf @@ -8,7 +8,9 @@ process GATK4_COMPOSESTRTABLEFILE { 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" input: - tuple path(fasta), path(fasta_fai), path(dict) + path(fasta) + path(fasta_fai) + path(dict) output: path "*.zip" , emit: str_table From d4f3df4677fb0aff66db16c896e59fc4ef93a01b Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Wed, 1 Jun 2022 09:17:05 +0200 Subject: [PATCH 05/26] Updated the test --- tests/modules/gatk4/composestrtablefile/main.nf | 14 ++++++++++---- tests/modules/gatk4/composestrtablefile/test.yml | 2 +- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/tests/modules/gatk4/composestrtablefile/main.nf b/tests/modules/gatk4/composestrtablefile/main.nf index 9fc2e5f5..da8967cc 100644 --- a/tests/modules/gatk4/composestrtablefile/main.nf +++ b/tests/modules/gatk4/composestrtablefile/main.nf @@ -6,11 +6,17 @@ include { GATK4_COMPOSESTRTABLEFILE } from '../../../../modules/gatk4/composestr workflow test_gatk4_composestrtablefile { - input = [ - file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true), - file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true), + fasta = [ + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + + fasta_fai = [ + file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + ] + + dict = [ file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) ] - GATK4_COMPOSESTRTABLEFILE ( input ) + GATK4_COMPOSESTRTABLEFILE ( fasta, fasta_fai, dict ) } diff --git a/tests/modules/gatk4/composestrtablefile/test.yml b/tests/modules/gatk4/composestrtablefile/test.yml index 597ae144..0a87372a 100644 --- a/tests/modules/gatk4/composestrtablefile/test.yml +++ b/tests/modules/gatk4/composestrtablefile/test.yml @@ -1,7 +1,7 @@ - name: gatk4 composestrtablefile test_gatk4_composestrtablefile command: nextflow run ./tests/modules/gatk4/composestrtablefile -entry test_gatk4_composestrtablefile -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/composestrtablefile/nextflow.config tags: - - gatk4 - gatk4/composestrtablefile + - gatk4 files: - path: output/gatk4/genome.zip From 8470f3cb7e790701401523c58a72e183ac47ecac Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Wed, 1 Jun 2022 09:20:20 +0200 Subject: [PATCH 06/26] linting --- tests/modules/gatk4/composestrtablefile/main.nf | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/tests/modules/gatk4/composestrtablefile/main.nf b/tests/modules/gatk4/composestrtablefile/main.nf index da8967cc..8fccadb9 100644 --- a/tests/modules/gatk4/composestrtablefile/main.nf +++ b/tests/modules/gatk4/composestrtablefile/main.nf @@ -6,17 +6,11 @@ include { GATK4_COMPOSESTRTABLEFILE } from '../../../../modules/gatk4/composestr workflow test_gatk4_composestrtablefile { - fasta = [ - file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - fasta_fai = [ - file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) - ] + fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) - dict = [ - file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) - ] + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) GATK4_COMPOSESTRTABLEFILE ( fasta, fasta_fai, dict ) } From 114a54c8d5a8e898a126c2804e3e221286eb2682 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Wed, 1 Jun 2022 09:22:09 +0200 Subject: [PATCH 07/26] spaces fix --- modules/gatk4/composestrtablefile/main.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/gatk4/composestrtablefile/main.nf b/modules/gatk4/composestrtablefile/main.nf index dae61166..8f2f00f2 100644 --- a/modules/gatk4/composestrtablefile/main.nf +++ b/modules/gatk4/composestrtablefile/main.nf @@ -8,9 +8,9 @@ process GATK4_COMPOSESTRTABLEFILE { 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" input: - path(fasta) - path(fasta_fai) - path(dict) + path(fasta) + path(fasta_fai) + path(dict) output: path "*.zip" , emit: str_table From f6953b51478d24a4caf84b40d92bba3350a38507 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Wed, 1 Jun 2022 17:35:02 +0200 Subject: [PATCH 08/26] adjust cnvkit/batch for germline mode --- modules/cnvkit/batch/main.nf | 38 +++++++++++------ tests/config/test_data.config | 6 +-- tests/modules/cnvkit/batch/main.nf | 22 ++++++++-- tests/modules/cnvkit/batch/test.yml | 63 ++++++++++++++++++++++++++--- 4 files changed, 104 insertions(+), 25 deletions(-) diff --git a/modules/cnvkit/batch/main.nf b/modules/cnvkit/batch/main.nf index c1f9ef87..b467c6f2 100644 --- a/modules/cnvkit/batch/main.nf +++ b/modules/cnvkit/batch/main.nf @@ -28,34 +28,48 @@ process CNVKIT_BATCH { script: def args = task.ext.args ?: '' - // execute samtools only when cram files are input, cnvkit runs natively on bam but is prohibitively slow - // input pair is assumed to have same extension if both exist - def is_cram = tumor.Extension == "cram" ? true : false - def tumor_out = is_cram ? tumor.BaseName + ".bam" : "${tumor}" + def tumor_exists = tumor ? true : false + def normal_exists = normal ? true : false + + // execute samtools only when cram files are input, cnvkit runs natively on bam but is prohibitively slow + def tumor_cram = tumor_exists && tumor.Extension == "cram" ? true : false + def normal_cram = normal_exists && normal.Extension == "cram" ? true : false + + def tumor_out = tumor_cram ? tumor.BaseName + ".bam" : "${tumor}" - // do not run samtools on normal samples in tumor_only mode - def normal_exists = normal ? true: false // tumor_only mode does not need fasta & target // instead it requires a pre-computed reference.cnn which is built from fasta & target def (normal_out, normal_args, fasta_args) = ["", "", ""] if (normal_exists){ def normal_prefix = normal.BaseName - normal_out = is_cram ? "${normal_prefix}" + ".bam" : "${normal}" - normal_args = normal_prefix ? "--normal $normal_out" : "" + normal_out = normal_cram ? "${normal_prefix}" + ".bam" : "${normal}" fasta_args = fasta ? "--fasta $fasta" : "" + + // germline mode + // normal samples must be input without a flag + // requires flag --normal to be empty + if(!tumor_exists){ + tumor_out = normal.BaseName + ".bam" + normal_args = "--normal " + } + // somatic mode + else { + normal_args = normal_prefix ? "--normal $normal_out" : "" + } } def target_args = targets ? "--targets $targets" : "" def reference_args = reference ? "--reference $reference" : "" """ - if $is_cram; then + if $tumor_cram; then samtools view -T $fasta $tumor -@ $task.cpus -o $tumor_out - if $normal_exists; then - samtools view -T $fasta $normal -@ $task.cpus -o $normal_out - fi fi + if $normal_cram; then + samtools view -T $fasta $normal -@ $task.cpus -o $normal_out + fi + cnvkit.py \\ batch \\ diff --git a/tests/config/test_data.config b/tests/config/test_data.config index cf7d45f6..bef6f0c9 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -23,6 +23,8 @@ params { test_bed12 = "${test_data_dir}/genomics/sarscov2/genome/bed/test.bed12" baits_bed = "${test_data_dir}/genomics/sarscov2/genome/bed/baits.bed" + reference_cnn = "${test_data_dir}/genomics/sarscov2/genome/cnn/reference.cnn" + kraken2 = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2" kraken2_tar_gz = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2.tar.gz" @@ -145,6 +147,7 @@ params { genome_21_multi_interval_bed_gz = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed.gz" genome_21_multi_interval_bed_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed.gz.tbi" genome_21_chromosomes_dir = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/chromosomes.tar.gz" + genome_21_reference_cnn = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/reference_chr21.cnn" dbsnp_146_hg38_elsites = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.elsites" dbsnp_146_hg38_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz" @@ -414,9 +417,6 @@ params { 'txt' { hello = "${test_data_dir}/generic/txt/hello.txt" } - 'cnn' { - reference = "${test_data_dir}/generic/cnn/reference.cnn" - } 'cooler'{ test_pairix_pair_gz = "${test_data_dir}/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz" test_pairix_pair_gz_px2 = "${test_data_dir}/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz.px2" diff --git a/tests/modules/cnvkit/batch/main.nf b/tests/modules/cnvkit/batch/main.nf index 988ed3ad..c93e3a00 100755 --- a/tests/modules/cnvkit/batch/main.nf +++ b/tests/modules/cnvkit/batch/main.nf @@ -5,6 +5,7 @@ nextflow.enable.dsl = 2 include { CNVKIT_BATCH as CNVKIT_HYBRID } from '../../../../modules/cnvkit/batch/main.nf' include { CNVKIT_BATCH as CNVKIT_WGS } from '../../../../modules/cnvkit/batch/main.nf' include { CNVKIT_BATCH as CNVKIT_TUMORONLY } from '../../../../modules/cnvkit/batch/main.nf' +include { CNVKIT_BATCH as CNVKIT_GERMLINE } from '../../../../modules/cnvkit/batch/main.nf' workflow test_cnvkit_hybrid { @@ -47,10 +48,10 @@ workflow test_cnvkit_tumoronly { input = [ [ id:'test'], // meta map - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true), [] ] - reference = file(params.test_data['generic']['cnn']['reference'], checkIfExists: true) + reference = file(params.test_data['homo_sapiens']['genome']['genome_21_reference_cnn'], checkIfExists: true) CNVKIT_TUMORONLY ( input, [], [], reference ) } @@ -59,11 +60,24 @@ workflow test_cnvkit_tumoronly_cram { input = [ [ id:'test'], // meta map - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true), [] ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - reference = file(params.test_data['generic']['cnn']['reference'], checkIfExists: true) + reference = file(params.test_data['homo_sapiens']['genome']['genome_21_reference_cnn'], checkIfExists: true) CNVKIT_TUMORONLY ( input, fasta, [], reference ) } + +workflow test_cnvkit_germline_cram { + + input = [ + [ id:'test'], // meta map + [], + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true) + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + targets = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) + + CNVKIT_GERMLINE ( input, fasta, targets, []) +} diff --git a/tests/modules/cnvkit/batch/test.yml b/tests/modules/cnvkit/batch/test.yml index 00d6a767..928a08de 100755 --- a/tests/modules/cnvkit/batch/test.yml +++ b/tests/modules/cnvkit/batch/test.yml @@ -92,9 +92,22 @@ - cnvkit - cnvkit/batch files: - - path: output/cnvkit/reference.antitarget-tmp.bed - - path: output/cnvkit/reference.target-tmp.bed - md5sum: 26d25ff2d6c45b6d92169b3559c6acdb + - path: output/cnvkit/reference_chr21.antitarget-tmp.bed + md5sum: 3d4d20f9f23b39970865d29ef239d20b + - path: output/cnvkit/reference_chr21.target-tmp.bed + md5sum: 657b25dbda8516624efa8cb2cf3716ca + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn + md5sum: 067115082c4af4b64d58c0dc3a3642e4 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.bintest.cns + md5sum: f6adc75a0a86b7a921eca1b79a394cb0 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.call.cns + md5sum: f7caeca04aba28b125ce26b511f42afb + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.cnr + md5sum: d9bdb71ce807051369577ee7f807a40c + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.cns + md5sum: 2b56aac606ba6183d018b30ca58afcec + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn + md5sum: e6d0190c1c37ce6e41f76ca5b24ccca3 - name: cnvkit batch test_cnvkit_tumoronly_cram command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config @@ -102,6 +115,44 @@ - cnvkit - cnvkit/batch files: - - path: output/cnvkit/reference.antitarget-tmp.bed - - path: output/cnvkit/reference.target-tmp.bed - md5sum: 26d25ff2d6c45b6d92169b3559c6acdb + - path: output/cnvkit/reference_chr21.antitarget-tmp.bed + md5sum: 3d4d20f9f23b39970865d29ef239d20b + - path: output/cnvkit/reference_chr21.target-tmp.bed + md5sum: 657b25dbda8516624efa8cb2cf3716ca + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn + md5sum: 067115082c4af4b64d58c0dc3a3642e4 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.bintest.cns + md5sum: f6adc75a0a86b7a921eca1b79a394cb0 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.call.cns + md5sum: f7caeca04aba28b125ce26b511f42afb + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.cnr + md5sum: d9bdb71ce807051369577ee7f807a40c + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.cns + md5sum: 2b56aac606ba6183d018b30ca58afcec + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn + md5sum: e6d0190c1c37ce6e41f76ca5b24ccca3 + +- name: cnvkit batch test_cnvkit_germline_cram + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_germline_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config + tags: + - cnvkit + - cnvkit/batch + files: + - path: output/cnvkit/multi_intervals.antitarget.bed + md5sum: 3d4d20f9f23b39970865d29ef239d20b + - path: output/cnvkit/multi_intervals.target.bed + md5sum: 86d30493bb2e619a93f4ebc2923d29f3 + - path: output/cnvkit/reference.cnn + md5sum: a09ee4be5dda1cf0f68073bdb3aad8ec + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn + md5sum: 067115082c4af4b64d58c0dc3a3642e4 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.bintest.cns + md5sum: 68b62b75cd91b2ffe5633686fb943490 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.call.cns + md5sum: df196edd72613c59186f4d87df3dc4a4 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.cnr + md5sum: 3b4fc0cc73be78f978cfe2422470753e + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.cns + md5sum: 4e67451dbcb6601fc3fa5dd7e570f1d4 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn + md5sum: b4a49faf170e436ec32dcc21ccc3ce8f From a21cfb666eed7517060f1d2d61a21c340753b0ec Mon Sep 17 00:00:00 2001 From: SusiJo Date: Thu, 2 Jun 2022 10:38:05 +0200 Subject: [PATCH 09/26] changed container + add cram2bam conversion --- modules/deeptools/bamcoverage/main.nf | 58 +++++++++++++++----- modules/deeptools/bamcoverage/meta.yml | 9 +++ tests/modules/deeptools/bamcoverage/main.nf | 18 +++++- tests/modules/deeptools/bamcoverage/test.yml | 21 ++++--- 4 files changed, 82 insertions(+), 24 deletions(-) diff --git a/modules/deeptools/bamcoverage/main.nf b/modules/deeptools/bamcoverage/main.nf index 926bf0ad..04073ed9 100644 --- a/modules/deeptools/bamcoverage/main.nf +++ b/modules/deeptools/bamcoverage/main.nf @@ -2,13 +2,15 @@ process DEEPTOOLS_BAMCOVERAGE { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::deeptools=3.5.1" : null) + conda (params.enable_conda ? "bioconda::deeptools=3.5.1 bioconda::samtools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/deeptools:3.5.1--py_0': - 'quay.io/biocontainers/deeptools:3.5.1--py_0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-eb9e7907c7a753917c1e4d7a64384c047429618a:2c687053c0252667cca265c9f4118f2c205a604c-0': + 'quay.io/biocontainers/mulled-v2-eb9e7907c7a753917c1e4d7a64384c047429618a:2c687053c0252667cca265c9f4118f2c205a604c-0' }" input: tuple val(meta), path(input), path(input_index) + path(fasta) + path(fasta_fai) output: tuple val(meta), path("*.bigWig") , emit: bigwig, optional: true @@ -22,16 +24,44 @@ process DEEPTOOLS_BAMCOVERAGE { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}.bigWig" - """ - bamCoverage \\ - --bam $input \\ - $args \\ - --numberOfProcessors ${task.cpus} \\ - --outFileName ${prefix} + // cram_input is currently not working with deeptools + // therefore it's required to convert cram to bam first + def is_cram = input.Extension == "cram" ? true : false + def input_out = is_cram ? input.BaseName + ".bam" : "${input}" + def fai_reference = fasta_fai ? "--fai-reference ${fasta_fai}" : "" + + if (is_cram){ + """ + samtools view -T $fasta $input $fai_reference -@ $task.cpus -o $input_out + samtools index -b $input_out -@ $task.cpus + + bamCoverage \\ + --bam $input_out \\ + $args \\ + --numberOfProcessors ${task.cpus} \\ + --outFileName ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + deeptools: \$(bamCoverage --version | sed -e "s/bamCoverage //g") + END_VERSIONS + """ + + } + else { + """ + bamCoverage \\ + --bam $input_out \\ + $args \\ + --numberOfProcessors ${task.cpus} \\ + --outFileName ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deeptools: \$(bamCoverage --version | sed -e "s/bamCoverage //g") + END_VERSIONS + """ + } - cat <<-END_VERSIONS > versions.yml - "${task.process}": - deeptools: \$(bamCoverage --version | sed -e "s/bamCoverage //g") - END_VERSIONS - """ } diff --git a/modules/deeptools/bamcoverage/meta.yml b/modules/deeptools/bamcoverage/meta.yml index fb92168f..c6566910 100644 --- a/modules/deeptools/bamcoverage/meta.yml +++ b/modules/deeptools/bamcoverage/meta.yml @@ -25,6 +25,14 @@ input: type: file description: BAM/CRAM index file pattern: "*.{bai,crai}" + - fasta: + type: file + description: Reference file the CRAM file was created with (required with CRAM input) + pattern: "*.{fasta,fa}" + - fasta_fai: + type: file + description: Index of the reference file (optional, but recommended) + pattern: "*.{fai}" output: - meta: @@ -47,3 +55,4 @@ output: authors: - "@FriederikeHanssen" + - "@SusiJo" diff --git a/tests/modules/deeptools/bamcoverage/main.nf b/tests/modules/deeptools/bamcoverage/main.nf index fb5c1c2d..97a79508 100644 --- a/tests/modules/deeptools/bamcoverage/main.nf +++ b/tests/modules/deeptools/bamcoverage/main.nf @@ -12,7 +12,7 @@ workflow test_deeptools_bamcoverage_bam { file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) ] - DEEPTOOLS_BAMCOVERAGE ( input ) + DEEPTOOLS_BAMCOVERAGE ( input, [], [] ) } workflow test_deeptools_bamcoverage_cram { @@ -22,6 +22,20 @@ workflow test_deeptools_bamcoverage_cram { file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) - DEEPTOOLS_BAMCOVERAGE ( input ) + DEEPTOOLS_BAMCOVERAGE ( input, fasta, fasta_fai) +} + +workflow test_deeptools_bamcoverage_cram_no_fasta_fai { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + + DEEPTOOLS_BAMCOVERAGE ( input, fasta, []) } diff --git a/tests/modules/deeptools/bamcoverage/test.yml b/tests/modules/deeptools/bamcoverage/test.yml index 736c2e61..63ef6442 100644 --- a/tests/modules/deeptools/bamcoverage/test.yml +++ b/tests/modules/deeptools/bamcoverage/test.yml @@ -1,21 +1,26 @@ - name: deeptools bamcoverage test_deeptools_bamcoverage_bam - command: nextflow run tests/modules/deeptools/bamcoverage -entry test_deeptools_bamcoverage_bam -c tests/config/nextflow.config + command: nextflow run ./tests/modules/deeptools/bamcoverage -entry test_deeptools_bamcoverage_bam -c ./tests/config/nextflow.config -c ./tests/modules/deeptools/bamcoverage/nextflow.config tags: - - deeptools - deeptools/bamcoverage + - deeptools files: - path: output/deeptools/test.bigWig md5sum: 95fe9383a9e6c02aea6b785cf074274f - - path: output/deeptools/versions.yml - md5sum: 68c94e73b7a8c0935578bad61fea54c1 - name: deeptools bamcoverage test_deeptools_bamcoverage_cram - command: nextflow run tests/modules/deeptools/bamcoverage -entry test_deeptools_bamcoverage_cram -c tests/config/nextflow.config + command: nextflow run ./tests/modules/deeptools/bamcoverage -entry test_deeptools_bamcoverage_cram -c ./tests/config/nextflow.config -c ./tests/modules/deeptools/bamcoverage/nextflow.config tags: - - deeptools - deeptools/bamcoverage + - deeptools + files: + - path: output/deeptools/test.bigWig + md5sum: 95fe9383a9e6c02aea6b785cf074274f + +- name: deeptools bamcoverage test_deeptools_bamcoverage_cram_no_fasta_fai + command: nextflow run ./tests/modules/deeptools/bamcoverage -entry test_deeptools_bamcoverage_cram_no_fasta_fai -c ./tests/config/nextflow.config -c ./tests/modules/deeptools/bamcoverage/nextflow.config + tags: + - deeptools/bamcoverage + - deeptools files: - path: output/deeptools/test.bigWig md5sum: 95fe9383a9e6c02aea6b785cf074274f - - path: output/deeptools/versions.yml - md5sum: 665bbd2979c49bf3974a24bd44a88e94 From 51be617b1ca9bff973655eb899d591ed6ab253b5 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Thu, 2 Jun 2022 11:09:55 +0200 Subject: [PATCH 10/26] fix untar output dir --- modules/untar/main.nf | 8 +++++++- tests/config/test_data.config | 2 ++ tests/modules/untar/main.nf | 10 ++++++++++ tests/modules/untar/test.yml | 12 ++++++++++-- 4 files changed, 29 insertions(+), 3 deletions(-) diff --git a/modules/untar/main.nf b/modules/untar/main.nf index 058d1764..29ab10a5 100644 --- a/modules/untar/main.nf +++ b/modules/untar/main.nf @@ -21,12 +21,18 @@ process UNTAR { def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' untar = archive.toString() - '.tar.gz' + """ + mkdir output + tar \\ + -C output --strip-components 1 \\ -xzvf \\ $args \\ $archive \\ - $args2 \\ + $args2 + + mv output ${untar} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/tests/config/test_data.config b/tests/config/test_data.config index 39331664..4af77d92 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -323,6 +323,8 @@ params { test_sv_vcf = "${test_data_dir}/genomics/homo_sapiens/illumina/vcf/sv_query.vcf.gz" test_pytor = "${test_data_dir}/genomics/homo_sapiens/illumina/pytor/test.pytor" + + test_flowcell = "${test_data_dir}/genomics/homo_sapiens/illumina/bcl/flowcell.tar.gz" } 'pacbio' { primers = "${test_data_dir}/genomics/homo_sapiens/pacbio/fasta/primers.fasta" diff --git a/tests/modules/untar/main.nf b/tests/modules/untar/main.nf index 9d6d4c6c..3b7daedd 100644 --- a/tests/modules/untar/main.nf +++ b/tests/modules/untar/main.nf @@ -12,3 +12,13 @@ workflow test_untar { UNTAR ( input ) } + + +workflow test_untar_different_output_path { + input = [ + [], + file(params.test_data['homo_sapiens']['illumina']['test_flowcell'], checkIfExists: true) + ] + + UNTAR ( input ) +} diff --git a/tests/modules/untar/test.yml b/tests/modules/untar/test.yml index 6d0d1d12..6599f105 100644 --- a/tests/modules/untar/test.yml +++ b/tests/modules/untar/test.yml @@ -1,5 +1,5 @@ -- name: untar - command: nextflow run ./tests/modules/untar -entry test_untar -c ./tests/config/nextflow.config -c ./tests/modules/untar/nextflow.config +- name: untar test_untar + command: nextflow run ./tests/modules/untar -entry test_untar -c ./tests/config/nextflow.config -c ./tests/modules/untar/nextflow.config tags: - untar files: @@ -9,3 +9,11 @@ md5sum: a033d00cf6759407010b21700938f543 - path: output/untar/kraken2/taxo.k2d md5sum: 094d5891cdccf2f1468088855c214b2c + +- name: untar test_untar_different_output_path + command: nextflow run ./tests/modules/untar -entry test_untar_different_output_path -c ./tests/config/nextflow.config -c ./tests/modules/untar/nextflow.config + tags: + - untar + files: + - path: output/untar/flowcell/RunInfo.xml + md5sum: 03038959f4dd181c86bc97ae71fe270a From 2f3ddc90fbeda9fcf54f25b84465f5ac149d2b75 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Thu, 2 Jun 2022 12:57:50 +0200 Subject: [PATCH 11/26] added fasta_fai as input + changed if statements --- modules/cnvkit/batch/main.nf | 152 +++++++++++++++++++++++----- modules/cnvkit/batch/meta.yml | 4 + tests/modules/cnvkit/batch/main.nf | 44 +++++--- tests/modules/cnvkit/batch/test.yml | 49 ++++++--- 4 files changed, 200 insertions(+), 49 deletions(-) diff --git a/modules/cnvkit/batch/main.nf b/modules/cnvkit/batch/main.nf index b467c6f2..090f7860 100644 --- a/modules/cnvkit/batch/main.nf +++ b/modules/cnvkit/batch/main.nf @@ -10,6 +10,7 @@ process CNVKIT_BATCH { input: tuple val(meta), path(tumor), path(normal) path fasta + path fasta_fai path targets path reference @@ -34,12 +35,15 @@ process CNVKIT_BATCH { // execute samtools only when cram files are input, cnvkit runs natively on bam but is prohibitively slow def tumor_cram = tumor_exists && tumor.Extension == "cram" ? true : false def normal_cram = normal_exists && normal.Extension == "cram" ? true : false + def tumor_bam = tumor_exists && tumor.Extension == "bam" ? true : false + def normal_bam = normal_exists && normal.Extension == "bam" ? true : false def tumor_out = tumor_cram ? tumor.BaseName + ".bam" : "${tumor}" // tumor_only mode does not need fasta & target // instead it requires a pre-computed reference.cnn which is built from fasta & target def (normal_out, normal_args, fasta_args) = ["", "", ""] + def fai_reference = fasta_fai ? "--fai-reference ${fasta_fai}" : "" if (normal_exists){ def normal_prefix = normal.BaseName @@ -48,9 +52,9 @@ process CNVKIT_BATCH { // germline mode // normal samples must be input without a flag - // requires flag --normal to be empty + // requires flag --normal to be empty [] if(!tumor_exists){ - tumor_out = normal.BaseName + ".bam" + tumor_out = "${normal_prefix}" + ".bam" normal_args = "--normal " } // somatic mode @@ -62,28 +66,130 @@ process CNVKIT_BATCH { def target_args = targets ? "--targets $targets" : "" def reference_args = reference ? "--reference $reference" : "" - """ - if $tumor_cram; then - samtools view -T $fasta $tumor -@ $task.cpus -o $tumor_out - fi - if $normal_cram; then - samtools view -T $fasta $normal -@ $task.cpus -o $normal_out - fi + // somatic_mode cram_input + if (tumor_cram && normal_cram){ + """ + samtools view -T $fasta $fai_reference $tumor -@ $task.cpus -o $tumor_out + samtools view -T $fasta $fai_reference $normal -@ $task.cpus -o $normal_out + cnvkit.py \\ + batch \\ + $tumor_out \\ + $normal_args \\ + $fasta_args \\ + $reference_args \\ + $target_args \\ + --processes $task.cpus \\ + $args - cnvkit.py \\ - batch \\ - $tumor_out \\ - $normal_args \\ - $fasta_args \\ - $reference_args \\ - $target_args \\ - --processes $task.cpus \\ - $args + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ + } + // somatic_mode bam_input + else if (tumor_bam && normal_bam){ + """ + cnvkit.py \\ + batch \\ + $tumor_out \\ + $normal_args \\ + $fasta_args \\ + $reference_args \\ + $target_args \\ + --processes $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ + } + // tumor_only_mode cram_input + else if(tumor_cram && !normal_exists){ + """ + samtools view -T $fasta $fai_reference $tumor -@ $task.cpus -o $tumor_out + + cnvkit.py \\ + batch \\ + $tumor_out \\ + $normal_args \\ + $fasta_args \\ + $reference_args \\ + $target_args \\ + --processes $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ + } + // tumor_only bam_input + else if(tumor_bam && !normal_exists){ + """ + cnvkit.py \\ + batch \\ + $tumor_out \\ + $normal_args \\ + $fasta_args \\ + $reference_args \\ + $target_args \\ + --processes $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ + } + // germline mode cram_input + // normal_args must be --normal [] + else if (normal_cram && !tumor_exists){ + """ + samtools view -T $fasta $fai_reference $normal -@ $task.cpus -o $tumor_out + + cnvkit.py \\ + batch \\ + $tumor_out \\ + $normal_args \\ + $fasta_args \\ + $reference_args \\ + $target_args \\ + --processes $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ + } + // germline mode bam_input + else if (normal_bam && !tumor_exists){ + """ + cnvkit.py \\ + batch \\ + $tumor_out \\ + $normal_args \\ + $fasta_args \\ + $reference_args \\ + $target_args \\ + --processes $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ + } - cat <<-END_VERSIONS > versions.yml - "${task.process}": - cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") - END_VERSIONS - """ } diff --git a/modules/cnvkit/batch/meta.yml b/modules/cnvkit/batch/meta.yml index 2cd675c7..89762192 100644 --- a/modules/cnvkit/batch/meta.yml +++ b/modules/cnvkit/batch/meta.yml @@ -29,6 +29,10 @@ input: type: file description: | Input reference genome fasta file (only needed for cram_input and/or when normal_samples are provided) + - fasta_fai: + type: file + description: | + Input reference genome fasta index (optional, but recommended for cram_input) - targetfile: type: file description: | diff --git a/tests/modules/cnvkit/batch/main.nf b/tests/modules/cnvkit/batch/main.nf index c93e3a00..daf87a37 100755 --- a/tests/modules/cnvkit/batch/main.nf +++ b/tests/modules/cnvkit/batch/main.nf @@ -7,7 +7,7 @@ include { CNVKIT_BATCH as CNVKIT_WGS } from '../../../../modules/cnvkit/ba include { CNVKIT_BATCH as CNVKIT_TUMORONLY } from '../../../../modules/cnvkit/batch/main.nf' include { CNVKIT_BATCH as CNVKIT_GERMLINE } from '../../../../modules/cnvkit/batch/main.nf' -workflow test_cnvkit_hybrid { +workflow test_cnvkit_hybrid_somatic { input = [ [ id:'test' ], // meta map @@ -17,10 +17,10 @@ workflow test_cnvkit_hybrid { fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) targets = file(params.test_data['sarscov2']['genome']['baits_bed'], checkIfExists: true) - CNVKIT_HYBRID ( input, fasta, targets, [] ) + CNVKIT_HYBRID ( input, fasta, [], targets, [] ) } -workflow test_cnvkit_wgs { +workflow test_cnvkit_wgs_somatic { input = [ [ id:'test'], // meta map @@ -29,22 +29,24 @@ workflow test_cnvkit_wgs { ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - CNVKIT_WGS ( input, fasta, [], [] ) + CNVKIT_WGS ( input, fasta, [], [], [] ) } -workflow test_cnvkit_cram { +workflow test_cnvkit_cram_wgs_somatic { input = [ [ id:'test'], // meta map file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_cram'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true) ] - fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) - CNVKIT_WGS ( input, fasta, [], [] ) + CNVKIT_WGS ( input, fasta, fasta_fai, [], [] ) } -workflow test_cnvkit_tumoronly { + +workflow test_cnvkit_tumoronly_hybrid_bam { input = [ [ id:'test'], // meta map @@ -53,10 +55,10 @@ workflow test_cnvkit_tumoronly { ] reference = file(params.test_data['homo_sapiens']['genome']['genome_21_reference_cnn'], checkIfExists: true) - CNVKIT_TUMORONLY ( input, [], [], reference ) + CNVKIT_TUMORONLY ( input, [], [], [], reference ) } -workflow test_cnvkit_tumoronly_cram { +workflow test_cnvkit_tumoronly_hybrid_cram { input = [ [ id:'test'], // meta map @@ -66,10 +68,10 @@ workflow test_cnvkit_tumoronly_cram { fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) reference = file(params.test_data['homo_sapiens']['genome']['genome_21_reference_cnn'], checkIfExists: true) - CNVKIT_TUMORONLY ( input, fasta, [], reference ) + CNVKIT_TUMORONLY ( input, fasta, [], [], reference ) } -workflow test_cnvkit_germline_cram { +workflow test_cnvkit_germline_hybrid_cram { input = [ [ id:'test'], // meta map @@ -77,7 +79,21 @@ workflow test_cnvkit_germline_cram { file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true) ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) - targets = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) + fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) + targets = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) - CNVKIT_GERMLINE ( input, fasta, targets, []) + CNVKIT_GERMLINE ( input, fasta, fasta_fai, targets, []) +} + +workflow test_cnvkit_germline_hybrid_bam { + + input = [ + [ id:'test'], // meta map + [], + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true) + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + targets = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) + + CNVKIT_GERMLINE ( input, fasta, [], targets, []) } diff --git a/tests/modules/cnvkit/batch/test.yml b/tests/modules/cnvkit/batch/test.yml index 928a08de..006e142f 100755 --- a/tests/modules/cnvkit/batch/test.yml +++ b/tests/modules/cnvkit/batch/test.yml @@ -1,5 +1,5 @@ -- name: cnvkit batch test_cnvkit_hybrid - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_hybrid -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config +- name: cnvkit batch test_cnvkit_hybrid_somatic + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_hybrid_somatic -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - cnvkit - cnvkit/batch @@ -26,8 +26,8 @@ - path: output/cnvkit/test.single_end.sorted.targetcoverage.cnn md5sum: aa8a018b1d4d1e688c9f9f6ae01bf4d7 -- name: cnvkit batch test_cnvkit_wgs - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_wgs -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config +- name: cnvkit batch test_cnvkit_wgs_somatic + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_wgs_somatic -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - cnvkit - cnvkit/batch @@ -56,8 +56,8 @@ - path: output/cnvkit/test2.paired_end.sorted.targetcoverage.cnn md5sum: 6ae6b3fce7299eedca6133d911c38fe1 -- name: cnvkit batch test_cnvkit_cram - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config +- name: cnvkit batch test_cnvkit_cram_wgs_somatic + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_cram_wgs_somatic -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - cnvkit - cnvkit/batch @@ -86,8 +86,8 @@ - path: output/cnvkit/test2.paired_end.sorted.targetcoverage.cnn md5sum: 6ae6b3fce7299eedca6133d911c38fe1 -- name: cnvkit batch test_cnvkit_tumoronly - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config +- name: cnvkit batch test_cnvkit_tumoronly_hybrid_bam + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly_hybrid_bam -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - cnvkit - cnvkit/batch @@ -109,8 +109,8 @@ - path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn md5sum: e6d0190c1c37ce6e41f76ca5b24ccca3 -- name: cnvkit batch test_cnvkit_tumoronly_cram - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config +- name: cnvkit batch test_cnvkit_tumoronly_hybrid_cram + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly_hybrid_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - cnvkit - cnvkit/batch @@ -132,8 +132,33 @@ - path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn md5sum: e6d0190c1c37ce6e41f76ca5b24ccca3 -- name: cnvkit batch test_cnvkit_germline_cram - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_germline_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config +- name: cnvkit batch test_cnvkit_germline_hybrid_cram + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_germline_hybrid_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config + tags: + - cnvkit + - cnvkit/batch + files: + - path: output/cnvkit/multi_intervals.antitarget.bed + md5sum: 3d4d20f9f23b39970865d29ef239d20b + - path: output/cnvkit/multi_intervals.target.bed + md5sum: 86d30493bb2e619a93f4ebc2923d29f3 + - path: output/cnvkit/reference.cnn + md5sum: a09ee4be5dda1cf0f68073bdb3aad8ec + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn + md5sum: 067115082c4af4b64d58c0dc3a3642e4 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.bintest.cns + md5sum: 68b62b75cd91b2ffe5633686fb943490 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.call.cns + md5sum: df196edd72613c59186f4d87df3dc4a4 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.cnr + md5sum: 3b4fc0cc73be78f978cfe2422470753e + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.cns + md5sum: 4e67451dbcb6601fc3fa5dd7e570f1d4 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn + md5sum: b4a49faf170e436ec32dcc21ccc3ce8f + +- name: cnvkit batch test_cnvkit_germline_hybrid_bam + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_germline_hybrid_bam -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - cnvkit - cnvkit/batch From 2a45d4286895d77d47b3a3dd9bc845b4d13276df Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 2 Jun 2022 13:17:06 +0200 Subject: [PATCH 12/26] added the calibratedragstrmodel module --- modules/gatk4/calibratedragstrmodel/main.nf | 48 +++++++++++++ modules/gatk4/calibratedragstrmodel/meta.yml | 70 +++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/config/test_data.config | 1 + .../gatk4/calibratedragstrmodel/main.nf | 44 ++++++++++++ .../calibratedragstrmodel/nextflow.config | 5 ++ .../gatk4/calibratedragstrmodel/test.yml | 17 +++++ 7 files changed, 189 insertions(+) create mode 100644 modules/gatk4/calibratedragstrmodel/main.nf create mode 100644 modules/gatk4/calibratedragstrmodel/meta.yml create mode 100644 tests/modules/gatk4/calibratedragstrmodel/main.nf create mode 100644 tests/modules/gatk4/calibratedragstrmodel/nextflow.config create mode 100644 tests/modules/gatk4/calibratedragstrmodel/test.yml diff --git a/modules/gatk4/calibratedragstrmodel/main.nf b/modules/gatk4/calibratedragstrmodel/main.nf new file mode 100644 index 00000000..6682753d --- /dev/null +++ b/modules/gatk4/calibratedragstrmodel/main.nf @@ -0,0 +1,48 @@ +process GATK4_CALIBRATEDRAGSTRMODEL{ + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': + 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + + input: + tuple val(meta), path(bam), path(bam_index) + path fasta + path fasta_fai + path dict + path strtablefile + + output: + tuple val(meta), path("*.txt") , emit: dragstr_model + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK CalibrateDragstrModel] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + gatk --java-options "-Xmx${avail_mem}g" CalibrateDragstrModel \\ + --input $bam \\ + --output ${prefix}.txt \\ + --reference $fasta \\ + --str-table-path $strtablefile \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/gatk4/calibratedragstrmodel/meta.yml b/modules/gatk4/calibratedragstrmodel/meta.yml new file mode 100644 index 00000000..f72a4f60 --- /dev/null +++ b/modules/gatk4/calibratedragstrmodel/meta.yml @@ -0,0 +1,70 @@ +name: "gatk4_calibratedragstrmodel" +description: estimates the parameters for the DRAGstr model +keywords: + - gatk4 + - bam + - cram + - sam + - calibratedragstrmodel +tools: + - gatk4: + description: + Genome Analysis Toolkit (GATK4). Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360057441571-CalibrateDragstrModel-BETA- + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + # Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bam_index: + type: file + description: index of the BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" + - fasta: + type: file + description: The reference FASTA file + pattern: "*.{fasta,fa}" + - fasta_fai: + type: file + description: The index of the reference FASTA file + pattern: "*.fai" + - dict: + type: file + description: The sequence dictionary of the reference FASTA file + pattern: "*.dict" + - strtablefile: + type: file + description: The StrTableFile zip folder of the reference FASTA file + pattern: "*.zip" + +output: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - dragstr_model: + type: file + description: The DragSTR model + pattern: "*.txt" + +authors: + - "@nvnieuwk" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index bf4a6137..d4bfac5b 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -743,6 +743,10 @@ gatk4/calculatecontamination: - modules/gatk4/calculatecontamination/** - tests/modules/gatk4/calculatecontamination/** +gatk4/calibratedragstrmodel: + - modules/gatk4/calibratedragstrmodel/** + - tests/modules/gatk4/calibratedragstrmodel/** + gatk4/cnnscorevariants: - modules/gatk4/cnnscorevariants/** - tests/modules/gatk4/cnnscorevariants/** diff --git a/tests/config/test_data.config b/tests/config/test_data.config index 39331664..b1326544 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -121,6 +121,7 @@ params { genome_elfasta = "${test_data_dir}/genomics/homo_sapiens/genome/genome.elfasta" genome_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/genome.fasta" genome_fasta_fai = "${test_data_dir}/genomics/homo_sapiens/genome/genome.fasta.fai" + genome_strtablefile = "${test_data_dir}/genomics/homo_sapiens/genome/genome_strtablefile.zip" genome_dict = "${test_data_dir}/genomics/homo_sapiens/genome/genome.dict" genome_gff3 = "${test_data_dir}/genomics/homo_sapiens/genome/genome.gff3" genome_gtf = "${test_data_dir}/genomics/homo_sapiens/genome/genome.gtf" diff --git a/tests/modules/gatk4/calibratedragstrmodel/main.nf b/tests/modules/gatk4/calibratedragstrmodel/main.nf new file mode 100644 index 00000000..4d4e9896 --- /dev/null +++ b/tests/modules/gatk4/calibratedragstrmodel/main.nf @@ -0,0 +1,44 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK4_CALIBRATEDRAGSTRMODEL } from '../../../../modules/gatk4/calibratedragstrmodel/main.nf' + +workflow test_gatk4_calibratedragstrmodel_bam { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + + fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + + strtablefile = file(params.test_data['homo_sapiens']['genome']['genome_strtablefile'], checkIfExists: true) + + GATK4_CALIBRATEDRAGSTRMODEL ( input, fasta, fasta_fai, dict, strtablefile ) +} + +workflow test_gatk4_calibratedragstrmodel_cram { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + + fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + + strtablefile = file(params.test_data['homo_sapiens']['genome']['genome_strtablefile'], checkIfExists: true) + + GATK4_CALIBRATEDRAGSTRMODEL ( input, fasta, fasta_fai, dict, strtablefile ) +} + diff --git a/tests/modules/gatk4/calibratedragstrmodel/nextflow.config b/tests/modules/gatk4/calibratedragstrmodel/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/gatk4/calibratedragstrmodel/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/gatk4/calibratedragstrmodel/test.yml b/tests/modules/gatk4/calibratedragstrmodel/test.yml new file mode 100644 index 00000000..bc46dead --- /dev/null +++ b/tests/modules/gatk4/calibratedragstrmodel/test.yml @@ -0,0 +1,17 @@ +- name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_bam + command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_bam -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config + tags: + - gatk4 + - gatk4/calibratedragstrmodel + files: + - path: output/gatk4/test.txt + md5sum: 0a1a1583b157fa2251dd931ed165da4f + +- name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_cram + command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config + tags: + - gatk4 + - gatk4/calibratedragstrmodel + files: + - path: output/gatk4/test.txt + md5sum: 1aa7ab38023f724877b3323c5e6b9a4e From d3874bbae9035e6be95be42a712acfff4d28d00e Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 2 Jun 2022 13:22:55 +0200 Subject: [PATCH 13/26] linting --- modules/gatk4/calibratedragstrmodel/meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk4/calibratedragstrmodel/meta.yml b/modules/gatk4/calibratedragstrmodel/meta.yml index f72a4f60..e6d2b0b1 100644 --- a/modules/gatk4/calibratedragstrmodel/meta.yml +++ b/modules/gatk4/calibratedragstrmodel/meta.yml @@ -1,4 +1,4 @@ -name: "gatk4_calibratedragstrmodel" +name: gatk4_calibratedragstrmodel description: estimates the parameters for the DRAGstr model keywords: - gatk4 From 3a49b82863d6dc71fcfcb36a01b04cb58dcb6f12 Mon Sep 17 00:00:00 2001 From: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Date: Thu, 2 Jun 2022 13:39:44 +0200 Subject: [PATCH 14/26] Update modules/gatk4/calibratedragstrmodel/main.nf Co-authored-by: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> --- modules/gatk4/calibratedragstrmodel/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk4/calibratedragstrmodel/main.nf b/modules/gatk4/calibratedragstrmodel/main.nf index 6682753d..00a11de0 100644 --- a/modules/gatk4/calibratedragstrmodel/main.nf +++ b/modules/gatk4/calibratedragstrmodel/main.nf @@ -1,4 +1,4 @@ -process GATK4_CALIBRATEDRAGSTRMODEL{ +process GATK4_CALIBRATEDRAGSTRMODEL { tag "$meta.id" label 'process_low' From e53d091a6de1ae9fd681351c085d8abe076ba1ec Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 2 Jun 2022 14:08:09 +0200 Subject: [PATCH 15/26] added dragstr model to haplotypecaller --- modules/gatk4/haplotypecaller/main.nf | 4 +++- modules/gatk4/haplotypecaller/meta.yml | 4 ++++ tests/config/test_data.config | 2 ++ tests/modules/gatk4/haplotypecaller/main.nf | 22 +++++++++++++++++++- tests/modules/gatk4/haplotypecaller/test.yml | 18 ++++++++++------ 5 files changed, 42 insertions(+), 8 deletions(-) diff --git a/modules/gatk4/haplotypecaller/main.nf b/modules/gatk4/haplotypecaller/main.nf index 6dd3f69e..19cd57bb 100644 --- a/modules/gatk4/haplotypecaller/main.nf +++ b/modules/gatk4/haplotypecaller/main.nf @@ -8,7 +8,7 @@ process GATK4_HAPLOTYPECALLER { 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" input: - tuple val(meta), path(input), path(input_index), path(intervals) + tuple val(meta), path(input), path(input_index), path(intervals), path(dragstr_model) path fasta path fai path dict @@ -28,6 +28,7 @@ process GATK4_HAPLOTYPECALLER { def prefix = task.ext.prefix ?: "${meta.id}" def dbsnp_command = dbsnp ? "--dbsnp $dbsnp" : "" def interval_command = intervals ? "--intervals $intervals" : "" + def dragstr_command = dragstr_model ? "--dragstr-params-path $dragstr_model" : "" def avail_mem = 3 if (!task.memory) { @@ -42,6 +43,7 @@ process GATK4_HAPLOTYPECALLER { --reference $fasta \\ $dbsnp_command \\ $interval_command \\ + $dragstr_command \\ --tmp-dir . \\ $args diff --git a/modules/gatk4/haplotypecaller/meta.yml b/modules/gatk4/haplotypecaller/meta.yml index 81851a96..48193d91 100644 --- a/modules/gatk4/haplotypecaller/meta.yml +++ b/modules/gatk4/haplotypecaller/meta.yml @@ -32,6 +32,10 @@ input: - intervals: type: file description: Bed file with the genomic regions included in the library (optional) + - dragstr_model: + type: file + description: Text file containing the DragSTR model of the used BAM/CRAM file (optional) + pattern: "*.txt" - fasta: type: file description: The reference fasta file diff --git a/tests/config/test_data.config b/tests/config/test_data.config index 363903f0..fa7017b1 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -266,6 +266,8 @@ params { test_pileups_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test.pileups.table" test2_pileups_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test2.pileups.table" + test_paired_end_sorted_dragstrmodel = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test_paired_end_sorted_dragstrmodel.txt" + test_genomicsdb_tar_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test_genomicsdb.tar.gz" test_pon_genomicsdb_tar_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test_pon_genomicsdb.tar.gz" diff --git a/tests/modules/gatk4/haplotypecaller/main.nf b/tests/modules/gatk4/haplotypecaller/main.nf index 7b60baab..3ad3dfb5 100644 --- a/tests/modules/gatk4/haplotypecaller/main.nf +++ b/tests/modules/gatk4/haplotypecaller/main.nf @@ -8,6 +8,7 @@ workflow test_gatk4_haplotypecaller { input = [ [ id:'test' ], // meta map file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [], [] ] fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) @@ -21,6 +22,7 @@ workflow test_gatk4_haplotypecaller_cram { input = [ [ id:'test' ], // meta map file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + [], [] ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) @@ -34,7 +36,8 @@ workflow test_gatk4_haplotypecaller_intervals_dbsnp { input = [ [ id:'test' ], // meta map file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), - file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true), + [] ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) @@ -45,3 +48,20 @@ workflow test_gatk4_haplotypecaller_intervals_dbsnp { GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, sites, sites_tbi ) } + +workflow test_gatk4_haplotypecaller_dragstr_model { + input = [ [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + [], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_dragstrmodel'], checkIfExists: true) + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + sites = [] + sites_tbi = [] + + GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, sites, sites_tbi ) +} diff --git a/tests/modules/gatk4/haplotypecaller/test.yml b/tests/modules/gatk4/haplotypecaller/test.yml index 3d416a0d..b566ee66 100644 --- a/tests/modules/gatk4/haplotypecaller/test.yml +++ b/tests/modules/gatk4/haplotypecaller/test.yml @@ -1,29 +1,35 @@ - name: gatk4 haplotypecaller test_gatk4_haplotypecaller - command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config + command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config tags: - gatk4/haplotypecaller - gatk4 files: - path: output/gatk4/test.vcf.gz - path: output/gatk4/test.vcf.gz.tbi - - path: output/gatk4/versions.yml - name: gatk4 haplotypecaller test_gatk4_haplotypecaller_cram - command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config + command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config tags: - gatk4/haplotypecaller - gatk4 files: - path: output/gatk4/test.vcf.gz - path: output/gatk4/test.vcf.gz.tbi - - path: output/gatk4/versions.yml - name: gatk4 haplotypecaller test_gatk4_haplotypecaller_intervals_dbsnp - command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_intervals_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config + command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_intervals_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config + tags: + - gatk4/haplotypecaller + - gatk4 + files: + - path: output/gatk4/test.vcf.gz + - path: output/gatk4/test.vcf.gz.tbi + +- name: gatk4 haplotypecaller test_gatk4_haplotypecaller_dragstr_model + command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_dragstr_model -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config tags: - gatk4/haplotypecaller - gatk4 files: - path: output/gatk4/test.vcf.gz - path: output/gatk4/test.vcf.gz.tbi - - path: output/gatk4/versions.yml From a6c4276ad74eb965fd9d8eb831f0786b85abc9c8 Mon Sep 17 00:00:00 2001 From: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Date: Thu, 2 Jun 2022 14:19:57 +0200 Subject: [PATCH 16/26] Update tests/modules/gatk4/haplotypecaller/test.yml Co-authored-by: Maxime U. Garcia --- tests/modules/gatk4/haplotypecaller/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/haplotypecaller/test.yml b/tests/modules/gatk4/haplotypecaller/test.yml index b566ee66..ca3110d7 100644 --- a/tests/modules/gatk4/haplotypecaller/test.yml +++ b/tests/modules/gatk4/haplotypecaller/test.yml @@ -1,5 +1,5 @@ - name: gatk4 haplotypecaller test_gatk4_haplotypecaller - command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config + command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config tags: - gatk4/haplotypecaller - gatk4 From 2ce1cf3329eecc636ddfcbf27f86b398b45ac0ad Mon Sep 17 00:00:00 2001 From: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Date: Thu, 2 Jun 2022 14:20:04 +0200 Subject: [PATCH 17/26] Update tests/modules/gatk4/haplotypecaller/test.yml Co-authored-by: Maxime U. Garcia --- tests/modules/gatk4/haplotypecaller/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/haplotypecaller/test.yml b/tests/modules/gatk4/haplotypecaller/test.yml index ca3110d7..094beaef 100644 --- a/tests/modules/gatk4/haplotypecaller/test.yml +++ b/tests/modules/gatk4/haplotypecaller/test.yml @@ -8,7 +8,7 @@ - path: output/gatk4/test.vcf.gz.tbi - name: gatk4 haplotypecaller test_gatk4_haplotypecaller_cram - command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config + command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config tags: - gatk4/haplotypecaller - gatk4 From e419d306dfeb8bab0e1d3ac07151585c0fcbee01 Mon Sep 17 00:00:00 2001 From: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Date: Thu, 2 Jun 2022 14:20:10 +0200 Subject: [PATCH 18/26] Update tests/modules/gatk4/haplotypecaller/test.yml Co-authored-by: Maxime U. Garcia --- tests/modules/gatk4/haplotypecaller/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/haplotypecaller/test.yml b/tests/modules/gatk4/haplotypecaller/test.yml index 094beaef..17c4f6bf 100644 --- a/tests/modules/gatk4/haplotypecaller/test.yml +++ b/tests/modules/gatk4/haplotypecaller/test.yml @@ -17,7 +17,7 @@ - path: output/gatk4/test.vcf.gz.tbi - name: gatk4 haplotypecaller test_gatk4_haplotypecaller_intervals_dbsnp - command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_intervals_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config + command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_intervals_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config tags: - gatk4/haplotypecaller - gatk4 From e107d61069612fca5aaabcd4f056d06c732d0c18 Mon Sep 17 00:00:00 2001 From: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Date: Thu, 2 Jun 2022 14:20:15 +0200 Subject: [PATCH 19/26] Update tests/modules/gatk4/haplotypecaller/test.yml Co-authored-by: Maxime U. Garcia --- tests/modules/gatk4/haplotypecaller/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/haplotypecaller/test.yml b/tests/modules/gatk4/haplotypecaller/test.yml index 17c4f6bf..96270644 100644 --- a/tests/modules/gatk4/haplotypecaller/test.yml +++ b/tests/modules/gatk4/haplotypecaller/test.yml @@ -26,7 +26,7 @@ - path: output/gatk4/test.vcf.gz.tbi - name: gatk4 haplotypecaller test_gatk4_haplotypecaller_dragstr_model - command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_dragstr_model -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config + command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_dragstr_model -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config tags: - gatk4/haplotypecaller - gatk4 From 046fa3958223ce4542578ef26502d39f4c5d71ef Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 2 Jun 2022 16:09:09 +0200 Subject: [PATCH 20/26] update VEP version + cache --- modules/ensemblvep/Dockerfile | 4 ++-- modules/ensemblvep/build.sh | 12 ++++++------ modules/ensemblvep/environment.yml | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/modules/ensemblvep/Dockerfile b/modules/ensemblvep/Dockerfile index b4a1c664..4ada7c6b 100644 --- a/modules/ensemblvep/Dockerfile +++ b/modules/ensemblvep/Dockerfile @@ -11,8 +11,8 @@ RUN conda env create -f /environment.yml && conda clean -a # Setup default ARG variables ARG GENOME=GRCh38 ARG SPECIES=homo_sapiens -ARG VEP_VERSION=104 -ARG VEP_TAG=104.3 +ARG VEP_VERSION=105 +ARG VEP_TAG=105.0 # Add conda installation dir to PATH (instead of doing 'conda activate') ENV PATH /opt/conda/envs/nf-core-vep-${VEP_TAG}/bin:$PATH diff --git a/modules/ensemblvep/build.sh b/modules/ensemblvep/build.sh index 650c8704..402f2434 100755 --- a/modules/ensemblvep/build.sh +++ b/modules/ensemblvep/build.sh @@ -20,9 +20,9 @@ build_push() { docker push nfcore/vep:${VEP_TAG}.${GENOME} } -build_push "GRCh37" "homo_sapiens" "104" "104.3" -build_push "GRCh38" "homo_sapiens" "104" "104.3" -build_push "GRCm38" "mus_musculus" "102" "104.3" -build_push "GRCm39" "mus_musculus" "104" "104.3" -build_push "CanFam3.1" "canis_lupus_familiaris" "104" "104.3" -build_push "WBcel235" "caenorhabditis_elegans" "104" "104.3" +build_push "GRCh37" "homo_sapiens" "105" "105.0" +build_push "GRCh38" "homo_sapiens" "105" "105.0" +build_push "GRCm38" "mus_musculus" "102" "105.0" +build_push "GRCm39" "mus_musculus" "105" "105.0" +build_push "CanFam3.1" "canis_lupus_familiaris" "105" "105.0" +build_push "WBcel235" "caenorhabditis_elegans" "105" "105.0" diff --git a/modules/ensemblvep/environment.yml b/modules/ensemblvep/environment.yml index c0731c26..5df85b80 100644 --- a/modules/ensemblvep/environment.yml +++ b/modules/ensemblvep/environment.yml @@ -1,10 +1,10 @@ # You can use this file to create a conda environment for this module: # conda env create -f environment.yml -name: nf-core-vep-104.3 +name: nf-core-vep-105.0 channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::ensembl-vep=104.3 + - bioconda::ensembl-vep=105.0 From bc0f52bcd3a64f27d8fb2da3a6424b427e15e53c Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 2 Jun 2022 16:11:11 +0200 Subject: [PATCH 21/26] fix cache version --- modules/ensemblvep/build.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/ensemblvep/build.sh b/modules/ensemblvep/build.sh index 402f2434..e21a7c0d 100755 --- a/modules/ensemblvep/build.sh +++ b/modules/ensemblvep/build.sh @@ -20,9 +20,9 @@ build_push() { docker push nfcore/vep:${VEP_TAG}.${GENOME} } -build_push "GRCh37" "homo_sapiens" "105" "105.0" -build_push "GRCh38" "homo_sapiens" "105" "105.0" -build_push "GRCm38" "mus_musculus" "102" "105.0" -build_push "GRCm39" "mus_musculus" "105" "105.0" -build_push "CanFam3.1" "canis_lupus_familiaris" "105" "105.0" -build_push "WBcel235" "caenorhabditis_elegans" "105" "105.0" +# build_push "GRCh37" "homo_sapiens" "105" "105.0" +# build_push "GRCh38" "homo_sapiens" "105" "105.0" +# build_push "GRCm38" "mus_musculus" "102" "105.0" +# build_push "GRCm39" "mus_musculus" "105" "105.0" +build_push "CanFam3.1" "canis_lupus_familiaris" "104" "105.0" +# build_push "WBcel235" "caenorhabditis_elegans" "105" "105.0" From b623ff5d2798b8c955baeb3890f0baee6077db66 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 2 Jun 2022 16:11:45 +0200 Subject: [PATCH 22/26] fix comments --- modules/ensemblvep/build.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/ensemblvep/build.sh b/modules/ensemblvep/build.sh index e21a7c0d..6f340c0f 100755 --- a/modules/ensemblvep/build.sh +++ b/modules/ensemblvep/build.sh @@ -20,9 +20,9 @@ build_push() { docker push nfcore/vep:${VEP_TAG}.${GENOME} } -# build_push "GRCh37" "homo_sapiens" "105" "105.0" -# build_push "GRCh38" "homo_sapiens" "105" "105.0" -# build_push "GRCm38" "mus_musculus" "102" "105.0" -# build_push "GRCm39" "mus_musculus" "105" "105.0" +build_push "GRCh37" "homo_sapiens" "105" "105.0" +build_push "GRCh38" "homo_sapiens" "105" "105.0" +build_push "GRCm38" "mus_musculus" "102" "105.0" +build_push "GRCm39" "mus_musculus" "105" "105.0" build_push "CanFam3.1" "canis_lupus_familiaris" "104" "105.0" -# build_push "WBcel235" "caenorhabditis_elegans" "105" "105.0" +build_push "WBcel235" "caenorhabditis_elegans" "105" "105.0" From b4b1f3b81d897b97da0655cf48107b70aa4b2f73 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 2 Jun 2022 16:20:12 +0200 Subject: [PATCH 23/26] add intervals to calibratedragstrmodel --- modules/gatk4/calibratedragstrmodel/main.nf | 4 ++- modules/gatk4/calibratedragstrmodel/meta.yml | 4 +++ .../gatk4/calibratedragstrmodel/main.nf | 26 +++++++++++++++++-- .../gatk4/calibratedragstrmodel/test.yml | 13 ++++++++-- 4 files changed, 42 insertions(+), 5 deletions(-) diff --git a/modules/gatk4/calibratedragstrmodel/main.nf b/modules/gatk4/calibratedragstrmodel/main.nf index 00a11de0..08ba7008 100644 --- a/modules/gatk4/calibratedragstrmodel/main.nf +++ b/modules/gatk4/calibratedragstrmodel/main.nf @@ -8,7 +8,7 @@ process GATK4_CALIBRATEDRAGSTRMODEL { 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" input: - tuple val(meta), path(bam), path(bam_index) + tuple val(meta), path(bam), path(bam_index), path(intervals) path fasta path fasta_fai path dict @@ -24,6 +24,7 @@ process GATK4_CALIBRATEDRAGSTRMODEL { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def intervals_command = intervals ? "--intervals $intervals" : "" def avail_mem = 3 if (!task.memory) { @@ -37,6 +38,7 @@ process GATK4_CALIBRATEDRAGSTRMODEL { --output ${prefix}.txt \\ --reference $fasta \\ --str-table-path $strtablefile \\ + $intervals_command \\ --tmp-dir . \\ $args diff --git a/modules/gatk4/calibratedragstrmodel/meta.yml b/modules/gatk4/calibratedragstrmodel/meta.yml index e6d2b0b1..c3c40d81 100644 --- a/modules/gatk4/calibratedragstrmodel/meta.yml +++ b/modules/gatk4/calibratedragstrmodel/meta.yml @@ -33,6 +33,10 @@ input: type: file description: index of the BAM/CRAM/SAM file pattern: "*.{bai,crai,sai}" + - intervals: + type: file + description: BED file or interval list containing regions + pattern: "*.{bed,interval_list}" - fasta: type: file description: The reference FASTA file diff --git a/tests/modules/gatk4/calibratedragstrmodel/main.nf b/tests/modules/gatk4/calibratedragstrmodel/main.nf index 4d4e9896..136a9742 100644 --- a/tests/modules/gatk4/calibratedragstrmodel/main.nf +++ b/tests/modules/gatk4/calibratedragstrmodel/main.nf @@ -9,7 +9,8 @@ workflow test_gatk4_calibratedragstrmodel_bam { input = [ [ id:'test', single_end:false ], // meta map file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [] ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) @@ -28,7 +29,28 @@ workflow test_gatk4_calibratedragstrmodel_cram { input = [ [ id:'test', single_end:false ], // meta map file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + [] + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + + fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + + strtablefile = file(params.test_data['homo_sapiens']['genome']['genome_strtablefile'], checkIfExists: true) + + GATK4_CALIBRATEDRAGSTRMODEL ( input, fasta, fasta_fai, dict, strtablefile ) +} + +workflow test_gatk4_calibratedragstrmodel_beds { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) diff --git a/tests/modules/gatk4/calibratedragstrmodel/test.yml b/tests/modules/gatk4/calibratedragstrmodel/test.yml index bc46dead..53c428a9 100644 --- a/tests/modules/gatk4/calibratedragstrmodel/test.yml +++ b/tests/modules/gatk4/calibratedragstrmodel/test.yml @@ -1,8 +1,8 @@ - name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_bam command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_bam -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config tags: - - gatk4 - gatk4/calibratedragstrmodel + - gatk4 files: - path: output/gatk4/test.txt md5sum: 0a1a1583b157fa2251dd931ed165da4f @@ -10,8 +10,17 @@ - name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_cram command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config tags: - - gatk4 - gatk4/calibratedragstrmodel + - gatk4 files: - path: output/gatk4/test.txt md5sum: 1aa7ab38023f724877b3323c5e6b9a4e + +- name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_beds + command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_beds -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config + tags: + - gatk4/calibratedragstrmodel + - gatk4 + files: + - path: output/gatk4/test.txt + md5sum: def8baccad7bd59006f08fcb0a6721bf From f3c0107fa2ecdc510b4f569dc3e062c7e028ac7a Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 2 Jun 2022 16:24:03 +0200 Subject: [PATCH 24/26] added optional to intervals --- modules/gatk4/calibratedragstrmodel/meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk4/calibratedragstrmodel/meta.yml b/modules/gatk4/calibratedragstrmodel/meta.yml index c3c40d81..e71dac5e 100644 --- a/modules/gatk4/calibratedragstrmodel/meta.yml +++ b/modules/gatk4/calibratedragstrmodel/meta.yml @@ -35,7 +35,7 @@ input: pattern: "*.{bai,crai,sai}" - intervals: type: file - description: BED file or interval list containing regions + description: BED file or interval list containing regions (optional) pattern: "*.{bed,interval_list}" - fasta: type: file From 43ef3841d1249aa20793b783bb961f568af2b8a4 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 2 Jun 2022 20:25:12 +0200 Subject: [PATCH 25/26] remove duplciate path --- modules/gatk/realignertargetcreator/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk/realignertargetcreator/main.nf b/modules/gatk/realignertargetcreator/main.nf index 866ff8a5..7454430c 100644 --- a/modules/gatk/realignertargetcreator/main.nf +++ b/modules/gatk/realignertargetcreator/main.nf @@ -9,7 +9,7 @@ process GATK_REALIGNERTARGETCREATOR { input: tuple val(meta), path(input), path(index) - path path(fasta) + path(fasta) path(fai) path(dict) path(known_vcf) From b1edcc6e94124fc7f2473655268f6d72b12d7209 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 2 Jun 2022 20:26:09 +0200 Subject: [PATCH 26/26] Update modules/gatk/realignertargetcreator/main.nf --- modules/gatk/realignertargetcreator/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk/realignertargetcreator/main.nf b/modules/gatk/realignertargetcreator/main.nf index 7454430c..96b8806c 100644 --- a/modules/gatk/realignertargetcreator/main.nf +++ b/modules/gatk/realignertargetcreator/main.nf @@ -29,7 +29,7 @@ process GATK_REALIGNERTARGETCREATOR { def avail_mem = 3 if (!task.memory) { - log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + log.info '[GATK RealignerTargetCreator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { avail_mem = task.memory.giga }