mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-11-13 05:13:09 +00:00
Merge pull request #1700 from SusiJo/gatk_spark
Fix gatk/markduplicatesspark
This commit is contained in:
commit
c587fd1fe3
5 changed files with 87 additions and 26 deletions
|
@ -2,7 +2,7 @@ process GATK4_MARKDUPLICATES_SPARK {
|
|||
tag "$meta.id"
|
||||
label 'process_high'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null)
|
||||
conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0 conda-forge::openjdk=8.0.312" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' :
|
||||
'broadinstitute/gatk:4.2.3.0' }"
|
||||
|
@ -14,8 +14,9 @@ process GATK4_MARKDUPLICATES_SPARK {
|
|||
path dict
|
||||
|
||||
output:
|
||||
tuple val(meta), path("${prefix}"), emit: output
|
||||
path "versions.yml" , emit: versions
|
||||
tuple val(meta), path("${prefix}"), emit: output
|
||||
tuple val(meta), path("*.metrics"), emit: metrics, optional: true
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
@ -25,6 +26,7 @@ process GATK4_MARKDUPLICATES_SPARK {
|
|||
prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def input_list = bam.collect{"--input $it"}.join(' ')
|
||||
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
log.info '[GATK MarkDuplicatesSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
|
||||
|
@ -32,8 +34,6 @@ process GATK4_MARKDUPLICATES_SPARK {
|
|||
avail_mem = task.memory.giga
|
||||
}
|
||||
"""
|
||||
export SPARK_USER=spark3
|
||||
|
||||
gatk --java-options "-Xmx${avail_mem}g" MarkDuplicatesSpark \\
|
||||
$input_list \\
|
||||
--output $prefix \\
|
||||
|
@ -45,6 +45,7 @@ process GATK4_MARKDUPLICATES_SPARK {
|
|||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
|
||||
openjdk: \$(echo \$(java -version 2>&1) | grep version | sed 's/\"//g' | cut -f3 -d ' ')
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
||||
|
|
|
@ -58,3 +58,4 @@ authors:
|
|||
- "@ajodeh-juma"
|
||||
- "@FriederikeHanssen"
|
||||
- "@maxulysse"
|
||||
- "@SusiJo"
|
||||
|
|
|
@ -3,26 +3,55 @@
|
|||
nextflow.enable.dsl = 2
|
||||
|
||||
include { GATK4_MARKDUPLICATES_SPARK } from '../../../../modules/gatk4/markduplicatesspark/main.nf'
|
||||
include { GATK4_MARKDUPLICATES_SPARK as GATK4_MARKDUPLICATES_SPARK_CRAM } from '../../../../modules/gatk4/markduplicatesspark/main.nf'
|
||||
include { GATK4_MARKDUPLICATES_SPARK as GATK4_MARKDUPLICATES_SPARK_METRICS } from '../../../../modules/gatk4/markduplicatesspark/main.nf'
|
||||
|
||||
workflow test_gatk4_markduplicates_spark {
|
||||
input = [ [ id:'test', single_end:false ], // meta map
|
||||
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
|
||||
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
|
||||
]
|
||||
fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
|
||||
fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)
|
||||
dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true)
|
||||
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
|
||||
fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
|
||||
dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
|
||||
|
||||
GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict )
|
||||
}
|
||||
|
||||
// chr 22
|
||||
workflow test_gatk4_markduplicates_spark_multiple_bams {
|
||||
input = [ [ id:'test', single_end:false ], // meta map
|
||||
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
|
||||
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true)
|
||||
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true),
|
||||
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true)
|
||||
] ]
|
||||
fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
|
||||
fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)
|
||||
dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true)
|
||||
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
|
||||
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
|
||||
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
|
||||
|
||||
GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict )
|
||||
}
|
||||
|
||||
// chr 22
|
||||
workflow test_gatk4_markduplicates_spark_multiple_bams_cram_out {
|
||||
input = [ [ id:'test', single_end:false ], // meta map
|
||||
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true),
|
||||
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true)
|
||||
] ]
|
||||
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
|
||||
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
|
||||
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
|
||||
|
||||
GATK4_MARKDUPLICATES_SPARK_CRAM ( input, fasta, fai, dict )
|
||||
}
|
||||
|
||||
// chr 22
|
||||
workflow test_gatk4_markduplicates_spark_multiple_bams_metrics {
|
||||
input = [ [ id:'test', single_end:false ], // meta map
|
||||
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true),
|
||||
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true)
|
||||
] ]
|
||||
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
|
||||
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
|
||||
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
|
||||
|
||||
GATK4_MARKDUPLICATES_SPARK_METRICS ( input, fasta, fai, dict )
|
||||
}
|
||||
|
|
|
@ -2,4 +2,18 @@ process {
|
|||
|
||||
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
|
||||
|
||||
withName: GATK4_MARKDUPLICATES_SPARK {
|
||||
ext.prefix = { "${meta.id}.bam" }
|
||||
}
|
||||
withName: GATK4_MARKDUPLICATES_SPARK_CRAM {
|
||||
ext.prefix = { "${meta.id}.cram" }
|
||||
}
|
||||
withName: GATK4_MARKDUPLICATES_SPARK_METRICS {
|
||||
ext.args = '--metrics-file test.metrics'
|
||||
ext.prefix = { "${meta.id}.bam" }
|
||||
}
|
||||
|
||||
}
|
||||
// override tests/config/nextflow.config
|
||||
docker.userEmulation = false
|
||||
|
||||
|
|
|
@ -1,25 +1,41 @@
|
|||
- name: gatk4 markduplicates test_gatk4_markduplicates_spark
|
||||
command: nextflow run tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
|
||||
- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark
|
||||
command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
|
||||
tags:
|
||||
- gatk4
|
||||
- gatk4/markduplicatesspark
|
||||
files:
|
||||
- path: output/gatk4/test.bai
|
||||
md5sum: e9c125e82553209933883b4fe2b8d7c2
|
||||
- path: output/gatk4/test.bam
|
||||
md5sum: 2efd50b2e6b7fd9bdf242cd9e266cfa9
|
||||
- path: output/gatk4/test.metrics
|
||||
md5sum: dc1a09ac6371aab7c50d1a554baa06d3
|
||||
- path: output/gatk4/versions.yml
|
||||
|
||||
- name: gatk4 markduplicates test_gatk4_markduplicates_spark_multiple_bams
|
||||
command: nextflow run tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
|
||||
- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams
|
||||
command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
|
||||
tags:
|
||||
- gatk4
|
||||
- gatk4/markduplicatesspark
|
||||
files:
|
||||
- path: output/gatk4/test.bai
|
||||
md5sum: bad71df9c876e72a5bc0a3e0fd755f92
|
||||
- path: output/gatk4/test.bam
|
||||
md5sum: 8187febc6108ffef7f907e89b9c091a4
|
||||
- path: output/gatk4/test.metrics
|
||||
md5sum: 898cb0a6616897d8ada90bab53bf0837
|
||||
- path: output/gatk4/versions.yml
|
||||
|
||||
- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_cram_out
|
||||
command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_cram_out -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
|
||||
tags:
|
||||
- gatk4
|
||||
- gatk4/markduplicatesspark
|
||||
files:
|
||||
- path: output/gatk4/test.cram
|
||||
md5sum: 2271016de5e4199736598f39d12d7587
|
||||
- path: output/gatk4/versions.yml
|
||||
|
||||
- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_metrics
|
||||
command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_metrics -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
|
||||
tags:
|
||||
- gatk4
|
||||
- gatk4/markduplicatesspark
|
||||
files:
|
||||
- path: output/gatk4/test.bam
|
||||
md5sum: 898cb0a6616897d8ada90bab53bf0837
|
||||
- path: output/gatk4/test.metrics
|
||||
contains: ["## METRICS CLASS", "org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics"]
|
||||
- path: output/gatk4/versions.yml
|
||||
|
|
Loading…
Reference in a new issue