From 7389963d5cb18f81c10dff128c510e518ee4f0f6 Mon Sep 17 00:00:00 2001 From: FriederikeHanssen Date: Tue, 7 Dec 2021 15:22:24 +0100 Subject: [PATCH] Add memory stuff to all gatk4 modules (#1122) * Add memory stuff to all gatj4 modules * Add removed input line back in * revert script section --- modules/gatk4/applybqsr/main.nf | 3 ++- modules/gatk4/baserecalibrator/main.nf | 4 ++-- modules/gatk4/bedtointervallist/main.nf | 8 +++++++- modules/gatk4/calculatecontamination/main.nf | 8 +++++++- modules/gatk4/createsequencedictionary/main.nf | 2 +- modules/gatk4/createsomaticpanelofnormals/main.nf | 8 +++++++- modules/gatk4/estimatelibrarycomplexity/main.nf | 2 +- modules/gatk4/fastqtosam/main.nf | 8 +++++++- modules/gatk4/filtermutectcalls/main.nf | 8 +++++++- modules/gatk4/genomicsdbimport/main.nf | 8 +++++++- modules/gatk4/genotypegvcfs/main.nf | 8 +++++++- modules/gatk4/getpileupsummaries/main.nf | 8 +++++++- modules/gatk4/indexfeaturefile/main.nf | 8 +++++++- modules/gatk4/intervallisttools/main.nf | 8 +++++++- modules/gatk4/learnreadorientationmodel/main.nf | 8 +++++++- modules/gatk4/markduplicates/main.nf | 6 +++--- modules/gatk4/mergebamalignment/main.nf | 8 +++++++- modules/gatk4/mergevcfs/main.nf | 8 +++++++- modules/gatk4/mutect2/main.nf | 8 +++++++- modules/gatk4/revertsam/main.nf | 8 +++++++- modules/gatk4/samtofastq/main.nf | 8 +++++++- modules/gatk4/splitncigarreads/main.nf | 8 +++++++- modules/gatk4/variantfiltration/main.nf | 2 +- 23 files changed, 129 insertions(+), 26 deletions(-) diff --git a/modules/gatk4/applybqsr/main.nf b/modules/gatk4/applybqsr/main.nf index bd428d6c..3cc69ddf 100644 --- a/modules/gatk4/applybqsr/main.nf +++ b/modules/gatk4/applybqsr/main.nf @@ -22,13 +22,14 @@ process GATK4_APPLYBQSR { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def interval = intervals ? "-L ${intervals}" : "" + def avail_mem = 3 if (!task.memory) { log.info '[GATK ApplyBQSR] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { avail_mem = task.memory.giga } """ - gatk ApplyBQSR \\ + gatk --java-options "-Xmx${avail_mem}g" ApplyBQSR \\ -R $fasta \\ -I $input \\ --bqsr-recal-file $bqsr_table \\ diff --git a/modules/gatk4/baserecalibrator/main.nf b/modules/gatk4/baserecalibrator/main.nf index 9b0bf286..17b37943 100644 --- a/modules/gatk4/baserecalibrator/main.nf +++ b/modules/gatk4/baserecalibrator/main.nf @@ -25,14 +25,14 @@ process GATK4_BASERECALIBRATOR { def prefix = task.ext.prefix ?: "${meta.id}" def intervalsCommand = intervalsBed ? "-L ${intervalsBed}" : "" def sitesCommand = knownSites.collect{"--known-sites ${it}"}.join(' ') - + def avail_mem = 3 if (!task.memory) { log.info '[GATK BaseRecalibrator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { avail_mem = task.memory.giga } """ - gatk BaseRecalibrator \ + gatk --java-options "-Xmx${avail_mem}g" BaseRecalibrator \ -R $fasta \ -I $input \ $sitesCommand \ diff --git a/modules/gatk4/bedtointervallist/main.nf b/modules/gatk4/bedtointervallist/main.nf index c4538034..2f6266b9 100644 --- a/modules/gatk4/bedtointervallist/main.nf +++ b/modules/gatk4/bedtointervallist/main.nf @@ -18,8 +18,14 @@ process GATK4_BEDTOINTERVALLIST { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK BedToIntervalList] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } """ - gatk BedToIntervalList \\ + gatk --java-options "-Xmx${avail_mem}g" BedToIntervalList \\ -I $bed \\ -SD $sequence_dict \\ -O ${prefix}.interval_list \\ diff --git a/modules/gatk4/calculatecontamination/main.nf b/modules/gatk4/calculatecontamination/main.nf index 7c112c3c..8840356a 100644 --- a/modules/gatk4/calculatecontamination/main.nf +++ b/modules/gatk4/calculatecontamination/main.nf @@ -21,8 +21,14 @@ process GATK4_CALCULATECONTAMINATION { def prefix = task.ext.prefix ?: "${meta.id}" def matched_command = matched ? " -matched ${matched} " : '' def segment_command = segmentout ? " -segments ${prefix}.segmentation.table" : '' + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK CalculateContamination] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } """ - gatk CalculateContamination \\ + gatk --java-options "-Xmx${avail_mem}g" CalculateContamination \\ -I $pileup \\ $matched_command \\ -O ${prefix}.contamination.table \\ diff --git a/modules/gatk4/createsequencedictionary/main.nf b/modules/gatk4/createsequencedictionary/main.nf index 8d001856..e8f32106 100644 --- a/modules/gatk4/createsequencedictionary/main.nf +++ b/modules/gatk4/createsequencedictionary/main.nf @@ -18,7 +18,7 @@ process GATK4_CREATESEQUENCEDICTIONARY { def args = task.ext.args ?: '' def avail_mem = 6 if (!task.memory) { - log.info '[GATK] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.' + log.info '[GATK CreateSequenceDictionary] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.' } else { avail_mem = task.memory.giga } diff --git a/modules/gatk4/createsomaticpanelofnormals/main.nf b/modules/gatk4/createsomaticpanelofnormals/main.nf index 2860e82e..ff345f75 100644 --- a/modules/gatk4/createsomaticpanelofnormals/main.nf +++ b/modules/gatk4/createsomaticpanelofnormals/main.nf @@ -21,8 +21,14 @@ process GATK4_CREATESOMATICPANELOFNORMALS { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK CreateSomaticPanelOfNormals] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } """ - gatk \\ + gatk --java-options "-Xmx${avail_mem}g" \\ CreateSomaticPanelOfNormals \\ -R $fasta \\ -V gendb://$genomicsdb \\ diff --git a/modules/gatk4/estimatelibrarycomplexity/main.nf b/modules/gatk4/estimatelibrarycomplexity/main.nf index f636dc46..c17dba09 100644 --- a/modules/gatk4/estimatelibrarycomplexity/main.nf +++ b/modules/gatk4/estimatelibrarycomplexity/main.nf @@ -29,7 +29,7 @@ process GATK4_ESTIMATELIBRARYCOMPLEXITY { avail_mem = task.memory.giga } """ - gatk EstimateLibraryComplexity \ + gatk --java-options "-Xmx${avail_mem}g" EstimateLibraryComplexity \ ${crams} \ -O ${prefix}.metrics \ --REFERENCE_SEQUENCE ${fasta} \ diff --git a/modules/gatk4/fastqtosam/main.nf b/modules/gatk4/fastqtosam/main.nf index 915eb996..a55ba709 100644 --- a/modules/gatk4/fastqtosam/main.nf +++ b/modules/gatk4/fastqtosam/main.nf @@ -18,8 +18,14 @@ process GATK4_FASTQTOSAM { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def read_files = meta.single_end ? "-F1 $reads" : "-F1 ${reads[0]} -F2 ${reads[1]}" + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK FastqToSam] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } """ - gatk FastqToSam \\ + gatk --java-options "-Xmx${avail_mem}g" FastqToSam \\ $read_files \\ -O ${prefix}.bam \\ -SM $prefix \\ diff --git a/modules/gatk4/filtermutectcalls/main.nf b/modules/gatk4/filtermutectcalls/main.nf index 02fa804f..6a1d9b3a 100644 --- a/modules/gatk4/filtermutectcalls/main.nf +++ b/modules/gatk4/filtermutectcalls/main.nf @@ -37,8 +37,14 @@ process GATK4_FILTERMUTECTCALLS { if (contaminationfile) { contamination_options = '--contamination-table ' + contaminationfile.join(' --contamination-table ') } + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK FilterMutectCalls] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } """ - gatk FilterMutectCalls \\ + gatk --java-options "-Xmx${avail_mem}g" FilterMutectCalls \\ -R $fasta \\ -V $vcf \\ $orientationbias_options \\ diff --git a/modules/gatk4/genomicsdbimport/main.nf b/modules/gatk4/genomicsdbimport/main.nf index e794aa5a..2751173b 100644 --- a/modules/gatk4/genomicsdbimport/main.nf +++ b/modules/gatk4/genomicsdbimport/main.nf @@ -42,8 +42,14 @@ process GATK4_GENOMICSDBIMPORT { updated_db = wspace.toString() } + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK GenomicsDBImport] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } """ - gatk GenomicsDBImport \\ + gatk --java-options "-Xmx${avail_mem}g" GenomicsDBImport \\ $inputs_command \\ $dir_command \\ $intervals_command \\ diff --git a/modules/gatk4/genotypegvcfs/main.nf b/modules/gatk4/genotypegvcfs/main.nf index f0b35447..1a772860 100644 --- a/modules/gatk4/genotypegvcfs/main.nf +++ b/modules/gatk4/genotypegvcfs/main.nf @@ -26,8 +26,14 @@ process GATK4_GENOTYPEGVCFS { def dbsnp_options = dbsnp ? "-D ${dbsnp}" : "" def interval_options = intervals_bed ? "-L ${intervals_bed}" : "" def gvcf_options = gvcf.name.endsWith(".vcf") || gvcf.name.endsWith(".vcf.gz") ? "$gvcf" : "gendb://$gvcf" + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK GenotypeGVCFs] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } """ - gatk \\ + gatk --java-options "-Xmx${avail_mem}g" \\ GenotypeGVCFs \\ $args \\ $interval_options \\ diff --git a/modules/gatk4/getpileupsummaries/main.nf b/modules/gatk4/getpileupsummaries/main.nf index 99be601f..361974e8 100644 --- a/modules/gatk4/getpileupsummaries/main.nf +++ b/modules/gatk4/getpileupsummaries/main.nf @@ -24,8 +24,14 @@ process GATK4_GETPILEUPSUMMARIES { sitesCommand = sites ? " -L ${sites} " : " -L ${variants} " + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK GetPileupSummaries] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } """ - gatk GetPileupSummaries \\ + gatk --java-options "-Xmx${avail_mem}g" GetPileupSummaries \\ -I $bam \\ -V $variants \\ $sitesCommand \\ diff --git a/modules/gatk4/indexfeaturefile/main.nf b/modules/gatk4/indexfeaturefile/main.nf index d33e030c..cc6c663e 100644 --- a/modules/gatk4/indexfeaturefile/main.nf +++ b/modules/gatk4/indexfeaturefile/main.nf @@ -16,8 +16,14 @@ process GATK4_INDEXFEATUREFILE { script: def args = task.ext.args ?: '' + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK IndexFeatureFile] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } """ - gatk \\ + gatk --java-options "-Xmx${avail_mem}g" \\ IndexFeatureFile \\ $args \\ -I $feature_file diff --git a/modules/gatk4/intervallisttools/main.nf b/modules/gatk4/intervallisttools/main.nf index 7e1a47f7..b813d844 100644 --- a/modules/gatk4/intervallisttools/main.nf +++ b/modules/gatk4/intervallisttools/main.nf @@ -17,11 +17,17 @@ process GATK4_INTERVALLISTTOOLS { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK IntervalListTools] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } """ mkdir ${prefix}_split - gatk \\ + gatk --java-options "-Xmx${avail_mem}g" \\ IntervalListTools \\ -I ${interval_list} \\ -O ${prefix}_split \\ diff --git a/modules/gatk4/learnreadorientationmodel/main.nf b/modules/gatk4/learnreadorientationmodel/main.nf index ac021afa..0c2f09d2 100644 --- a/modules/gatk4/learnreadorientationmodel/main.nf +++ b/modules/gatk4/learnreadorientationmodel/main.nf @@ -19,8 +19,14 @@ process GATK4_LEARNREADORIENTATIONMODEL { def prefix = task.ext.prefix ?: "${meta.id}" def inputs_list = [] f1r2.each() { a -> inputs_list.add(" -I " + a) } + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK LearnReadOrientationModel] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } """ - gatk \\ + gatk --java-options "-Xmx${avail_mem}g" \\ LearnReadOrientationModel \\ ${inputs_list.join(' ')} \\ -O ${prefix}.tar.gz \\ diff --git a/modules/gatk4/markduplicates/main.nf b/modules/gatk4/markduplicates/main.nf index a109facc..8bdb2c0a 100644 --- a/modules/gatk4/markduplicates/main.nf +++ b/modules/gatk4/markduplicates/main.nf @@ -20,14 +20,14 @@ process GATK4_MARKDUPLICATES { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def bam_list = bams.collect(){ bam -> "--INPUT ".concat(bam.toString()) }.join(" ") - def avail_mem = 3 + def avail_mem = 3 if (!task.memory) { - log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + log.info '[GATK MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { avail_mem = task.memory.giga } """ - gatk MarkDuplicates \\ + gatk --java-options "-Xmx${avail_mem}g" MarkDuplicates \\ $bam_list \\ --METRICS_FILE ${prefix}.metrics \\ --TMP_DIR . \\ diff --git a/modules/gatk4/mergebamalignment/main.nf b/modules/gatk4/mergebamalignment/main.nf index 5e552cb2..a0f54976 100644 --- a/modules/gatk4/mergebamalignment/main.nf +++ b/modules/gatk4/mergebamalignment/main.nf @@ -20,8 +20,14 @@ process GATK4_MERGEBAMALIGNMENT { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK MergeBamAlignment] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } """ - gatk MergeBamAlignment \\ + gatk --java-options "-Xmx${avail_mem}g" MergeBamAlignment \\ ALIGNED=$aligned \\ UNMAPPED=$unmapped \\ R=$fasta \\ diff --git a/modules/gatk4/mergevcfs/main.nf b/modules/gatk4/mergevcfs/main.nf index cd1840c3..1fcce485 100644 --- a/modules/gatk4/mergevcfs/main.nf +++ b/modules/gatk4/mergevcfs/main.nf @@ -26,8 +26,14 @@ process GATK4_MERGEVCFS { input += " I=${vcf}" } def ref = use_ref_dict ? "D=${ref_dict}" : "" + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK MergeVcfs] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } """ - gatk MergeVcfs \\ + gatk --java-options "-Xmx${avail_mem}g" MergeVcfs \\ $input \\ O=${prefix}.vcf.gz \\ $ref \\ diff --git a/modules/gatk4/mutect2/main.nf b/modules/gatk4/mutect2/main.nf index 2cf940de..414c7705 100644 --- a/modules/gatk4/mutect2/main.nf +++ b/modules/gatk4/mutect2/main.nf @@ -53,8 +53,14 @@ process GATK4_MUTECT2 { normals_command = '-normal ' + which_norm.join( ' -normal ') } + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK Mutect2] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } """ - gatk Mutect2 \\ + gatk --java-options "-Xmx${avail_mem}g" Mutect2 \\ -R ${fasta} \\ ${inputs_command} \\ ${normals_command} \\ diff --git a/modules/gatk4/revertsam/main.nf b/modules/gatk4/revertsam/main.nf index 638b7705..0713d7ca 100644 --- a/modules/gatk4/revertsam/main.nf +++ b/modules/gatk4/revertsam/main.nf @@ -17,8 +17,14 @@ process GATK4_REVERTSAM { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK RevertSam] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } """ - gatk RevertSam \\ + gatk --java-options "-Xmx${avail_mem}g" RevertSam \\ I=$bam \\ O=${prefix}.reverted.bam \\ $args diff --git a/modules/gatk4/samtofastq/main.nf b/modules/gatk4/samtofastq/main.nf index a909f540..0afb7ef3 100644 --- a/modules/gatk4/samtofastq/main.nf +++ b/modules/gatk4/samtofastq/main.nf @@ -18,8 +18,14 @@ process GATK4_SAMTOFASTQ { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def output = meta.single_end ? "FASTQ=${prefix}.fastq.gz" : "FASTQ=${prefix}_1.fastq.gz SECOND_END_FASTQ=${prefix}_2.fastq.gz" + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK SamToFastq] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } """ - gatk SamToFastq \\ + gatk --java-options "-Xmx${avail_mem}g" SamToFastq \\ I=$bam \\ $output \\ $args diff --git a/modules/gatk4/splitncigarreads/main.nf b/modules/gatk4/splitncigarreads/main.nf index 65b82a35..6daed954 100644 --- a/modules/gatk4/splitncigarreads/main.nf +++ b/modules/gatk4/splitncigarreads/main.nf @@ -20,8 +20,14 @@ process GATK4_SPLITNCIGARREADS { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK SplitNCigarReads] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } """ - gatk SplitNCigarReads \\ + gatk --java-options "-Xmx${avail_mem}g" SplitNCigarReads \\ -R $fasta \\ -I $bam \\ -O ${prefix}.bam \\ diff --git a/modules/gatk4/variantfiltration/main.nf b/modules/gatk4/variantfiltration/main.nf index 00dc2588..efe245cc 100644 --- a/modules/gatk4/variantfiltration/main.nf +++ b/modules/gatk4/variantfiltration/main.nf @@ -23,7 +23,7 @@ process GATK4_VARIANTFILTRATION { def prefix = task.ext.prefix ?: "${meta.id}" def avail_mem = 3 if (!task.memory) { - log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + log.info '[GATK VariantFiltration] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { avail_mem = task.memory.toGiga() }