diff --git a/modules/ensemblvep/main.nf b/modules/ensemblvep/main.nf index a5a9b1ab..d2efe35f 100644 --- a/modules/ensemblvep/main.nf +++ b/modules/ensemblvep/main.nf @@ -13,6 +13,7 @@ process ENSEMBLVEP { val species val cache_version path cache + path fasta path extra_files output: @@ -27,6 +28,8 @@ process ENSEMBLVEP { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep" + def reference = fasta ? "--fasta $fasta" : "" + """ mkdir $prefix @@ -34,6 +37,7 @@ process ENSEMBLVEP { -i $vcf \\ -o ${prefix}.ann.vcf \\ $args \\ + $reference \\ --assembly $genome \\ --species $species \\ --cache \\ diff --git a/modules/ensemblvep/meta.yml b/modules/ensemblvep/meta.yml index 418bb970..9891815d 100644 --- a/modules/ensemblvep/meta.yml +++ b/modules/ensemblvep/meta.yml @@ -36,6 +36,11 @@ input: type: file description: | path to VEP cache (optional) + - fasta: + type: file + description: | + reference FASTA file (optional) + pattern: "*.{fasta,fa}" - extra_files: type: tuple description: | diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index 77e135db..db6a1aa1 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -2,7 +2,7 @@ process GATK4_MARKDUPLICATES_SPARK { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) + conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0 conda-forge::openjdk=8.0.312" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : 'broadinstitute/gatk:4.2.3.0' }" @@ -14,8 +14,9 @@ process GATK4_MARKDUPLICATES_SPARK { path dict output: - tuple val(meta), path("${prefix}"), emit: output - path "versions.yml" , emit: versions + tuple val(meta), path("${prefix}"), emit: output + tuple val(meta), path("*.metrics"), emit: metrics, optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -25,6 +26,7 @@ process GATK4_MARKDUPLICATES_SPARK { prefix = task.ext.prefix ?: "${meta.id}" def input_list = bam.collect{"--input $it"}.join(' ') + def avail_mem = 3 if (!task.memory) { log.info '[GATK MarkDuplicatesSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -32,8 +34,6 @@ process GATK4_MARKDUPLICATES_SPARK { avail_mem = task.memory.giga } """ - export SPARK_USER=spark3 - gatk --java-options "-Xmx${avail_mem}g" MarkDuplicatesSpark \\ $input_list \\ --output $prefix \\ @@ -45,6 +45,7 @@ process GATK4_MARKDUPLICATES_SPARK { cat <<-END_VERSIONS > versions.yml "${task.process}": gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + openjdk: \$(echo \$(java -version 2>&1) | grep version | sed 's/\"//g' | cut -f3 -d ' ') END_VERSIONS """ } diff --git a/modules/gatk4/markduplicatesspark/meta.yml b/modules/gatk4/markduplicatesspark/meta.yml index bf3e02ba..59be9b6d 100644 --- a/modules/gatk4/markduplicatesspark/meta.yml +++ b/modules/gatk4/markduplicatesspark/meta.yml @@ -58,3 +58,4 @@ authors: - "@ajodeh-juma" - "@FriederikeHanssen" - "@maxulysse" + - "@SusiJo" diff --git a/tests/modules/ensemblvep/main.nf b/tests/modules/ensemblvep/main.nf index 30d19957..3c8afada 100644 --- a/tests/modules/ensemblvep/main.nf +++ b/tests/modules/ensemblvep/main.nf @@ -4,11 +4,22 @@ nextflow.enable.dsl = 2 include { ENSEMBLVEP } from '../../../modules/ensemblvep/main.nf' -workflow test_ensemblvep { +workflow test_ensemblvep_fasta { input = [ [ id:'test' ], // meta map file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) ] - ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], [] ) + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + + ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], fasta, [] ) +} + +workflow test_ensemblvep_no_fasta { + input = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) + ] + + ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], [], [] ) } diff --git a/tests/modules/ensemblvep/test.yml b/tests/modules/ensemblvep/test.yml index 42384d6e..7e94d5fc 100644 --- a/tests/modules/ensemblvep/test.yml +++ b/tests/modules/ensemblvep/test.yml @@ -1,5 +1,13 @@ -- name: ensemblvep test_ensemblvep - command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config +- name: ensemblvep test_ensemblvep_fasta + command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep_fasta -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config + tags: + - ensemblvep + files: + - path: output/ensemblvep/test.ann.vcf + - path: output/ensemblvep/test.summary.html + +- name: ensemblvep test_ensemblvep_no_fasta + command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep_no_fasta -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config tags: - ensemblvep files: diff --git a/tests/modules/gatk4/markduplicatesspark/main.nf b/tests/modules/gatk4/markduplicatesspark/main.nf index 2f294f59..004fbb1e 100644 --- a/tests/modules/gatk4/markduplicatesspark/main.nf +++ b/tests/modules/gatk4/markduplicatesspark/main.nf @@ -3,26 +3,55 @@ nextflow.enable.dsl = 2 include { GATK4_MARKDUPLICATES_SPARK } from '../../../../modules/gatk4/markduplicatesspark/main.nf' +include { GATK4_MARKDUPLICATES_SPARK as GATK4_MARKDUPLICATES_SPARK_CRAM } from '../../../../modules/gatk4/markduplicatesspark/main.nf' +include { GATK4_MARKDUPLICATES_SPARK as GATK4_MARKDUPLICATES_SPARK_METRICS } from '../../../../modules/gatk4/markduplicatesspark/main.nf' workflow test_gatk4_markduplicates_spark { input = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] - fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) - fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) - dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict ) } +// chr 22 workflow test_gatk4_markduplicates_spark_multiple_bams { input = [ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true) + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true) ] ] - fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) - fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) - dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict ) } + +// chr 22 +workflow test_gatk4_markduplicates_spark_multiple_bams_cram_out { + input = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true) + ] ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + + GATK4_MARKDUPLICATES_SPARK_CRAM ( input, fasta, fai, dict ) +} + +// chr 22 +workflow test_gatk4_markduplicates_spark_multiple_bams_metrics { + input = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true) + ] ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + + GATK4_MARKDUPLICATES_SPARK_METRICS ( input, fasta, fai, dict ) +} diff --git a/tests/modules/gatk4/markduplicatesspark/nextflow.config b/tests/modules/gatk4/markduplicatesspark/nextflow.config index 8730f1c4..e0455b4b 100644 --- a/tests/modules/gatk4/markduplicatesspark/nextflow.config +++ b/tests/modules/gatk4/markduplicatesspark/nextflow.config @@ -2,4 +2,18 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + withName: GATK4_MARKDUPLICATES_SPARK { + ext.prefix = { "${meta.id}.bam" } + } + withName: GATK4_MARKDUPLICATES_SPARK_CRAM { + ext.prefix = { "${meta.id}.cram" } + } + withName: GATK4_MARKDUPLICATES_SPARK_METRICS { + ext.args = '--metrics-file test.metrics' + ext.prefix = { "${meta.id}.bam" } + } + } +// override tests/config/nextflow.config +docker.userEmulation = false + diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index b0c0b40d..4ef9b916 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -1,25 +1,41 @@ -- name: gatk4 markduplicates test_gatk4_markduplicates_spark - command: nextflow run tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config +- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark + command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config tags: - gatk4 - gatk4/markduplicatesspark files: - - path: output/gatk4/test.bai - md5sum: e9c125e82553209933883b4fe2b8d7c2 - path: output/gatk4/test.bam - md5sum: 2efd50b2e6b7fd9bdf242cd9e266cfa9 - - path: output/gatk4/test.metrics + md5sum: dc1a09ac6371aab7c50d1a554baa06d3 - path: output/gatk4/versions.yml -- name: gatk4 markduplicates test_gatk4_markduplicates_spark_multiple_bams - command: nextflow run tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config +- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams + command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config tags: - gatk4 - gatk4/markduplicatesspark files: - - path: output/gatk4/test.bai - md5sum: bad71df9c876e72a5bc0a3e0fd755f92 - path: output/gatk4/test.bam - md5sum: 8187febc6108ffef7f907e89b9c091a4 - - path: output/gatk4/test.metrics + md5sum: 898cb0a6616897d8ada90bab53bf0837 + - path: output/gatk4/versions.yml + +- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_cram_out + command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_cram_out -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config + tags: + - gatk4 + - gatk4/markduplicatesspark + files: + - path: output/gatk4/test.cram + md5sum: 2271016de5e4199736598f39d12d7587 + - path: output/gatk4/versions.yml + +- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_metrics + command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_metrics -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config + tags: + - gatk4 + - gatk4/markduplicatesspark + files: + - path: output/gatk4/test.bam + md5sum: 898cb0a6616897d8ada90bab53bf0837 + - path: output/gatk4/test.metrics + contains: ["## METRICS CLASS", "org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics"] - path: output/gatk4/versions.yml