nf-core_modules/modules/gatk4/markduplicates/main.nf
FriederikeHanssen e726b1730d
MD finally supports cram output (#2019)
* MD finally supports cram output

* publish crai file

* fix test output

* fix mv

* figure out if contains breaks stuff

* remove variable checksum

* add contains check

* remove variable checksum

Co-authored-by: Maxime U. Garcia <max.u.garcia@gmail.com>
2022-09-07 11:04:58 +02:00

57 lines
1.8 KiB
Text

process GATK4_MARKDUPLICATES {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0':
'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }"
input:
tuple val(meta), path(bam)
path fasta
path fasta_fai
output:
tuple val(meta), path("*cram"), emit: cram, optional: true
tuple val(meta), path("*bam"), emit: bam, optional: true
tuple val(meta), path("*.crai"), emit: crai, optional: true
tuple val(meta), path("*.bai"), emit: bai, optional: true
tuple val(meta), path("*.metrics"), emit: metrics
path "versions.yml", emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
def input_list = bam.collect{"--INPUT $it"}.join(' ')
def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : ""
def avail_mem = 3
if (!task.memory) {
log.info '[GATK MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
}
"""
gatk --java-options "-Xmx${avail_mem}g" MarkDuplicates \\
$input_list \\
--OUTPUT ${prefix} \\
--METRICS_FILE ${prefix}.metrics \\
--TMP_DIR . \\
${reference} \\
$args
if [[ ${prefix} == *.cram ]]; then
mv ${prefix}.bai ${prefix}.crai
fi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
END_VERSIONS
"""
}