nf-core_modules/modules/gatk4/markduplicates/main.nf
FriederikeHanssen ac1e6df076
Update to allow cram + update needed to use the gatk4 modules in sarek (#976)
* Make samtools/merge cram compliant

* samtools/stats cram compliance

* update yml file

* samtools/view to deal with crams

* Update tests to make sure cram works

* also fix tmp dir and min mem in one go

* basequalityrecal test for cram + min mem + tmpdir

* update haplotypecaller for sarek

* update haplotype yml

* update markdup to allow multiple bams, take out params to be passed with options.args

* remove TODO statement

* Remove variable md5sum

* add emtpy input to stats module in subworkflows

* subworkflows seem to work now on my side

* Apply code review

Co-authored-by: Maxime U. Garcia <maxime.garcia@scilifelab.se>

* replace bam with input to be more inclusive

* rename everywhere

* rename input

* remove variable checksum

Co-authored-by: Maxime U. Garcia <maxime.garcia@scilifelab.se>
2021-10-29 13:01:05 +02:00

53 lines
1.9 KiB
Text

// Import generic module functions
include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'
params.options = [:]
options = initOptions(params.options)
process GATK4_MARKDUPLICATES {
tag "$meta.id"
label 'process_low'
publishDir "${params.outdir}",
mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }
conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null)
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
container "https://depot.galaxyproject.org/singularity/gatk4:4.2.0.0--0"
} else {
container "quay.io/biocontainers/gatk4:4.2.0.0--0"
}
input:
tuple val(meta), path(bams)
output:
tuple val(meta), path("*.bam") , emit: bam
tuple val(meta), path("*.bai") , emit: bai
tuple val(meta), path("*.metrics"), emit: metrics
path "versions.yml" , emit: versions
script:
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def bam_list = bams.collect(){ bam -> "--INPUT ".concat(bam.toString()) }.join(" ")
def avail_mem = 3
if (!task.memory) {
log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
}
"""
gatk MarkDuplicates \\
$bam_list \\
--METRICS_FILE ${prefix}.metrics \\
--TMP_DIR . \\
--CREATE_INDEX true \\
--OUTPUT ${prefix}.bam \\
$options.args
cat <<-END_VERSIONS > versions.yml
${getProcessName(task.process)}:
${getSoftwareName(task.process)}: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
END_VERSIONS
"""
}