Merge branch 'master' into new-module-gatk4/reblockgvcf

This commit is contained in:
nvnieuwk 2022-06-09 14:57:20 +02:00 committed by GitHub
commit bf73b6d376
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 119 additions and 30 deletions

View file

@ -13,6 +13,7 @@ process ENSEMBLVEP {
val species
val cache_version
path cache
path fasta
path extra_files
output:
@ -27,6 +28,8 @@ process ENSEMBLVEP {
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep"
def reference = fasta ? "--fasta $fasta" : ""
"""
mkdir $prefix
@ -34,6 +37,7 @@ process ENSEMBLVEP {
-i $vcf \\
-o ${prefix}.ann.vcf \\
$args \\
$reference \\
--assembly $genome \\
--species $species \\
--cache \\

View file

@ -36,6 +36,11 @@ input:
type: file
description: |
path to VEP cache (optional)
- fasta:
type: file
description: |
reference FASTA file (optional)
pattern: "*.{fasta,fa}"
- extra_files:
type: tuple
description: |

View file

@ -2,7 +2,7 @@ process GATK4_MARKDUPLICATES_SPARK {
tag "$meta.id"
label 'process_high'
conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null)
conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0 conda-forge::openjdk=8.0.312" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' :
'broadinstitute/gatk:4.2.3.0' }"
@ -15,6 +15,7 @@ process GATK4_MARKDUPLICATES_SPARK {
output:
tuple val(meta), path("${prefix}"), emit: output
tuple val(meta), path("*.metrics"), emit: metrics, optional: true
path "versions.yml" , emit: versions
when:
@ -25,6 +26,7 @@ process GATK4_MARKDUPLICATES_SPARK {
prefix = task.ext.prefix ?: "${meta.id}"
def input_list = bam.collect{"--input $it"}.join(' ')
def avail_mem = 3
if (!task.memory) {
log.info '[GATK MarkDuplicatesSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
@ -32,8 +34,6 @@ process GATK4_MARKDUPLICATES_SPARK {
avail_mem = task.memory.giga
}
"""
export SPARK_USER=spark3
gatk --java-options "-Xmx${avail_mem}g" MarkDuplicatesSpark \\
$input_list \\
--output $prefix \\
@ -45,6 +45,7 @@ process GATK4_MARKDUPLICATES_SPARK {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
openjdk: \$(echo \$(java -version 2>&1) | grep version | sed 's/\"//g' | cut -f3 -d ' ')
END_VERSIONS
"""
}

View file

@ -58,3 +58,4 @@ authors:
- "@ajodeh-juma"
- "@FriederikeHanssen"
- "@maxulysse"
- "@SusiJo"

View file

@ -4,11 +4,22 @@ nextflow.enable.dsl = 2
include { ENSEMBLVEP } from '../../../modules/ensemblvep/main.nf'
workflow test_ensemblvep {
workflow test_ensemblvep_fasta {
input = [
[ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)
]
ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], [] )
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], fasta, [] )
}
workflow test_ensemblvep_no_fasta {
input = [
[ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)
]
ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], [], [] )
}

View file

@ -1,5 +1,13 @@
- name: ensemblvep test_ensemblvep
command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config
- name: ensemblvep test_ensemblvep_fasta
command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep_fasta -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config
tags:
- ensemblvep
files:
- path: output/ensemblvep/test.ann.vcf
- path: output/ensemblvep/test.summary.html
- name: ensemblvep test_ensemblvep_no_fasta
command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep_no_fasta -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config
tags:
- ensemblvep
files:

View file

@ -3,26 +3,55 @@
nextflow.enable.dsl = 2
include { GATK4_MARKDUPLICATES_SPARK } from '../../../../modules/gatk4/markduplicatesspark/main.nf'
include { GATK4_MARKDUPLICATES_SPARK as GATK4_MARKDUPLICATES_SPARK_CRAM } from '../../../../modules/gatk4/markduplicatesspark/main.nf'
include { GATK4_MARKDUPLICATES_SPARK as GATK4_MARKDUPLICATES_SPARK_METRICS } from '../../../../modules/gatk4/markduplicatesspark/main.nf'
workflow test_gatk4_markduplicates_spark {
input = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true)
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict )
}
// chr 22
workflow test_gatk4_markduplicates_spark_multiple_bams {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true)
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true)
] ]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true)
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict )
}
// chr 22
workflow test_gatk4_markduplicates_spark_multiple_bams_cram_out {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true)
] ]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
GATK4_MARKDUPLICATES_SPARK_CRAM ( input, fasta, fai, dict )
}
// chr 22
workflow test_gatk4_markduplicates_spark_multiple_bams_metrics {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true)
] ]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
GATK4_MARKDUPLICATES_SPARK_METRICS ( input, fasta, fai, dict )
}

View file

@ -2,4 +2,18 @@ process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: GATK4_MARKDUPLICATES_SPARK {
ext.prefix = { "${meta.id}.bam" }
}
withName: GATK4_MARKDUPLICATES_SPARK_CRAM {
ext.prefix = { "${meta.id}.cram" }
}
withName: GATK4_MARKDUPLICATES_SPARK_METRICS {
ext.args = '--metrics-file test.metrics'
ext.prefix = { "${meta.id}.bam" }
}
}
// override tests/config/nextflow.config
docker.userEmulation = false

View file

@ -1,25 +1,41 @@
- name: gatk4 markduplicates test_gatk4_markduplicates_spark
command: nextflow run tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark
command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
tags:
- gatk4
- gatk4/markduplicatesspark
files:
- path: output/gatk4/test.bai
md5sum: e9c125e82553209933883b4fe2b8d7c2
- path: output/gatk4/test.bam
md5sum: 2efd50b2e6b7fd9bdf242cd9e266cfa9
- path: output/gatk4/test.metrics
md5sum: dc1a09ac6371aab7c50d1a554baa06d3
- path: output/gatk4/versions.yml
- name: gatk4 markduplicates test_gatk4_markduplicates_spark_multiple_bams
command: nextflow run tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams
command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
tags:
- gatk4
- gatk4/markduplicatesspark
files:
- path: output/gatk4/test.bai
md5sum: bad71df9c876e72a5bc0a3e0fd755f92
- path: output/gatk4/test.bam
md5sum: 8187febc6108ffef7f907e89b9c091a4
- path: output/gatk4/test.metrics
md5sum: 898cb0a6616897d8ada90bab53bf0837
- path: output/gatk4/versions.yml
- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_cram_out
command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_cram_out -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
tags:
- gatk4
- gatk4/markduplicatesspark
files:
- path: output/gatk4/test.cram
md5sum: 2271016de5e4199736598f39d12d7587
- path: output/gatk4/versions.yml
- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_metrics
command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_metrics -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
tags:
- gatk4
- gatk4/markduplicatesspark
files:
- path: output/gatk4/test.bam
md5sum: 898cb0a6616897d8ada90bab53bf0837
- path: output/gatk4/test.metrics
contains: ["## METRICS CLASS", "org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics"]
- path: output/gatk4/versions.yml