mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-11-14 05:43:08 +00:00
Merge pull request #1700 from SusiJo/gatk_spark
Fix gatk/markduplicatesspark
This commit is contained in:
commit
c587fd1fe3
5 changed files with 87 additions and 26 deletions
|
@ -2,7 +2,7 @@ process GATK4_MARKDUPLICATES_SPARK {
|
||||||
tag "$meta.id"
|
tag "$meta.id"
|
||||||
label 'process_high'
|
label 'process_high'
|
||||||
|
|
||||||
conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null)
|
conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0 conda-forge::openjdk=8.0.312" : null)
|
||||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' :
|
'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' :
|
||||||
'broadinstitute/gatk:4.2.3.0' }"
|
'broadinstitute/gatk:4.2.3.0' }"
|
||||||
|
@ -14,8 +14,9 @@ process GATK4_MARKDUPLICATES_SPARK {
|
||||||
path dict
|
path dict
|
||||||
|
|
||||||
output:
|
output:
|
||||||
tuple val(meta), path("${prefix}"), emit: output
|
tuple val(meta), path("${prefix}"), emit: output
|
||||||
path "versions.yml" , emit: versions
|
tuple val(meta), path("*.metrics"), emit: metrics, optional: true
|
||||||
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
when:
|
when:
|
||||||
task.ext.when == null || task.ext.when
|
task.ext.when == null || task.ext.when
|
||||||
|
@ -25,6 +26,7 @@ process GATK4_MARKDUPLICATES_SPARK {
|
||||||
prefix = task.ext.prefix ?: "${meta.id}"
|
prefix = task.ext.prefix ?: "${meta.id}"
|
||||||
def input_list = bam.collect{"--input $it"}.join(' ')
|
def input_list = bam.collect{"--input $it"}.join(' ')
|
||||||
|
|
||||||
|
|
||||||
def avail_mem = 3
|
def avail_mem = 3
|
||||||
if (!task.memory) {
|
if (!task.memory) {
|
||||||
log.info '[GATK MarkDuplicatesSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
|
log.info '[GATK MarkDuplicatesSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
|
||||||
|
@ -32,8 +34,6 @@ process GATK4_MARKDUPLICATES_SPARK {
|
||||||
avail_mem = task.memory.giga
|
avail_mem = task.memory.giga
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
export SPARK_USER=spark3
|
|
||||||
|
|
||||||
gatk --java-options "-Xmx${avail_mem}g" MarkDuplicatesSpark \\
|
gatk --java-options "-Xmx${avail_mem}g" MarkDuplicatesSpark \\
|
||||||
$input_list \\
|
$input_list \\
|
||||||
--output $prefix \\
|
--output $prefix \\
|
||||||
|
@ -45,6 +45,7 @@ process GATK4_MARKDUPLICATES_SPARK {
|
||||||
cat <<-END_VERSIONS > versions.yml
|
cat <<-END_VERSIONS > versions.yml
|
||||||
"${task.process}":
|
"${task.process}":
|
||||||
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
|
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
|
||||||
|
openjdk: \$(echo \$(java -version 2>&1) | grep version | sed 's/\"//g' | cut -f3 -d ' ')
|
||||||
END_VERSIONS
|
END_VERSIONS
|
||||||
"""
|
"""
|
||||||
}
|
}
|
||||||
|
|
|
@ -58,3 +58,4 @@ authors:
|
||||||
- "@ajodeh-juma"
|
- "@ajodeh-juma"
|
||||||
- "@FriederikeHanssen"
|
- "@FriederikeHanssen"
|
||||||
- "@maxulysse"
|
- "@maxulysse"
|
||||||
|
- "@SusiJo"
|
||||||
|
|
|
@ -3,26 +3,55 @@
|
||||||
nextflow.enable.dsl = 2
|
nextflow.enable.dsl = 2
|
||||||
|
|
||||||
include { GATK4_MARKDUPLICATES_SPARK } from '../../../../modules/gatk4/markduplicatesspark/main.nf'
|
include { GATK4_MARKDUPLICATES_SPARK } from '../../../../modules/gatk4/markduplicatesspark/main.nf'
|
||||||
|
include { GATK4_MARKDUPLICATES_SPARK as GATK4_MARKDUPLICATES_SPARK_CRAM } from '../../../../modules/gatk4/markduplicatesspark/main.nf'
|
||||||
|
include { GATK4_MARKDUPLICATES_SPARK as GATK4_MARKDUPLICATES_SPARK_METRICS } from '../../../../modules/gatk4/markduplicatesspark/main.nf'
|
||||||
|
|
||||||
workflow test_gatk4_markduplicates_spark {
|
workflow test_gatk4_markduplicates_spark {
|
||||||
input = [ [ id:'test', single_end:false ], // meta map
|
input = [ [ id:'test', single_end:false ], // meta map
|
||||||
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
|
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
|
||||||
]
|
]
|
||||||
fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
|
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
|
||||||
fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)
|
fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
|
||||||
dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true)
|
dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
|
||||||
|
|
||||||
GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict )
|
GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// chr 22
|
||||||
workflow test_gatk4_markduplicates_spark_multiple_bams {
|
workflow test_gatk4_markduplicates_spark_multiple_bams {
|
||||||
input = [ [ id:'test', single_end:false ], // meta map
|
input = [ [ id:'test', single_end:false ], // meta map
|
||||||
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
|
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true),
|
||||||
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true)
|
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true)
|
||||||
] ]
|
] ]
|
||||||
fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
|
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
|
||||||
fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)
|
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
|
||||||
dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true)
|
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
|
||||||
|
|
||||||
GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict )
|
GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// chr 22
|
||||||
|
workflow test_gatk4_markduplicates_spark_multiple_bams_cram_out {
|
||||||
|
input = [ [ id:'test', single_end:false ], // meta map
|
||||||
|
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true),
|
||||||
|
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true)
|
||||||
|
] ]
|
||||||
|
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
|
||||||
|
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
|
||||||
|
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
|
||||||
|
|
||||||
|
GATK4_MARKDUPLICATES_SPARK_CRAM ( input, fasta, fai, dict )
|
||||||
|
}
|
||||||
|
|
||||||
|
// chr 22
|
||||||
|
workflow test_gatk4_markduplicates_spark_multiple_bams_metrics {
|
||||||
|
input = [ [ id:'test', single_end:false ], // meta map
|
||||||
|
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true),
|
||||||
|
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true)
|
||||||
|
] ]
|
||||||
|
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
|
||||||
|
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
|
||||||
|
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
|
||||||
|
|
||||||
|
GATK4_MARKDUPLICATES_SPARK_METRICS ( input, fasta, fai, dict )
|
||||||
|
}
|
||||||
|
|
|
@ -2,4 +2,18 @@ process {
|
||||||
|
|
||||||
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
|
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
|
||||||
|
|
||||||
|
withName: GATK4_MARKDUPLICATES_SPARK {
|
||||||
|
ext.prefix = { "${meta.id}.bam" }
|
||||||
|
}
|
||||||
|
withName: GATK4_MARKDUPLICATES_SPARK_CRAM {
|
||||||
|
ext.prefix = { "${meta.id}.cram" }
|
||||||
|
}
|
||||||
|
withName: GATK4_MARKDUPLICATES_SPARK_METRICS {
|
||||||
|
ext.args = '--metrics-file test.metrics'
|
||||||
|
ext.prefix = { "${meta.id}.bam" }
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
// override tests/config/nextflow.config
|
||||||
|
docker.userEmulation = false
|
||||||
|
|
||||||
|
|
|
@ -1,25 +1,41 @@
|
||||||
- name: gatk4 markduplicates test_gatk4_markduplicates_spark
|
- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark
|
||||||
command: nextflow run tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
|
command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
|
||||||
tags:
|
tags:
|
||||||
- gatk4
|
- gatk4
|
||||||
- gatk4/markduplicatesspark
|
- gatk4/markduplicatesspark
|
||||||
files:
|
files:
|
||||||
- path: output/gatk4/test.bai
|
|
||||||
md5sum: e9c125e82553209933883b4fe2b8d7c2
|
|
||||||
- path: output/gatk4/test.bam
|
- path: output/gatk4/test.bam
|
||||||
md5sum: 2efd50b2e6b7fd9bdf242cd9e266cfa9
|
md5sum: dc1a09ac6371aab7c50d1a554baa06d3
|
||||||
- path: output/gatk4/test.metrics
|
|
||||||
- path: output/gatk4/versions.yml
|
- path: output/gatk4/versions.yml
|
||||||
|
|
||||||
- name: gatk4 markduplicates test_gatk4_markduplicates_spark_multiple_bams
|
- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams
|
||||||
command: nextflow run tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
|
command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
|
||||||
tags:
|
tags:
|
||||||
- gatk4
|
- gatk4
|
||||||
- gatk4/markduplicatesspark
|
- gatk4/markduplicatesspark
|
||||||
files:
|
files:
|
||||||
- path: output/gatk4/test.bai
|
|
||||||
md5sum: bad71df9c876e72a5bc0a3e0fd755f92
|
|
||||||
- path: output/gatk4/test.bam
|
- path: output/gatk4/test.bam
|
||||||
md5sum: 8187febc6108ffef7f907e89b9c091a4
|
md5sum: 898cb0a6616897d8ada90bab53bf0837
|
||||||
- path: output/gatk4/test.metrics
|
- path: output/gatk4/versions.yml
|
||||||
|
|
||||||
|
- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_cram_out
|
||||||
|
command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_cram_out -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
|
||||||
|
tags:
|
||||||
|
- gatk4
|
||||||
|
- gatk4/markduplicatesspark
|
||||||
|
files:
|
||||||
|
- path: output/gatk4/test.cram
|
||||||
|
md5sum: 2271016de5e4199736598f39d12d7587
|
||||||
|
- path: output/gatk4/versions.yml
|
||||||
|
|
||||||
|
- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_metrics
|
||||||
|
command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_metrics -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
|
||||||
|
tags:
|
||||||
|
- gatk4
|
||||||
|
- gatk4/markduplicatesspark
|
||||||
|
files:
|
||||||
|
- path: output/gatk4/test.bam
|
||||||
|
md5sum: 898cb0a6616897d8ada90bab53bf0837
|
||||||
|
- path: output/gatk4/test.metrics
|
||||||
|
contains: ["## METRICS CLASS", "org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics"]
|
||||||
- path: output/gatk4/versions.yml
|
- path: output/gatk4/versions.yml
|
||||||
|
|
Loading…
Reference in a new issue