nf-core_modules/modules/gatk4/markduplicatesspark/main.nf

50 lines
1.4 KiB
Text
Raw Normal View History

process GATK4_MARKDUPLICATES_SPARK {
tag "$meta.id"
label 'process_high'
2022-06-09 14:54:19 +02:00
conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1 conda-forge::openjdk=8.0.312" : null)
2022-06-09 15:16:11 +02:00
container 'broadinstitute/gatk:4.2.6.1'
input:
tuple val(meta), path(bam)
path fasta
path fasta_fai
path dict
output:
2022-06-03 13:50:45 +02:00
tuple val(meta), path("${prefix}"), emit: output
2022-05-31 14:34:50 +02:00
tuple val(meta), path("*.metrics"), emit: metrics, optional: true
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
2022-06-07 13:58:57 +02:00
prefix = task.ext.prefix ?: "${meta.id}"
2022-04-13 15:15:44 +02:00
def input_list = bam.collect{"--input $it"}.join(' ')
2022-05-25 16:38:48 +02:00
def avail_mem = 3
if (!task.memory) {
log.info '[GATK MarkDuplicatesSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
}
"""
gatk --java-options "-Xmx${avail_mem}g" MarkDuplicatesSpark \\
$input_list \\
--output $prefix \\
--reference $fasta \\
--spark-master local[${task.cpus}] \\
--tmp-dir . \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
2022-06-09 10:24:39 +02:00
openjdk: \$(echo \$(java -version 2>&1) | grep version | sed 's/\"//g' | cut -f3 -d ' ')
END_VERSIONS
"""
}