MD finally supports cram output (#2019)

* MD finally supports cram output

* publish crai file

* fix test output

* fix mv

* figure out if contains breaks stuff

* remove variable checksum

* add contains check

* remove variable checksum

Co-authored-by: Maxime U. Garcia <max.u.garcia@gmail.com>
This commit is contained in:
FriederikeHanssen 2022-09-07 11:04:58 +02:00 committed by GitHub
parent 5e7b1ef9a5
commit e726b1730d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 81 additions and 17 deletions

View file

@ -9,20 +9,25 @@ process GATK4_MARKDUPLICATES {
input:
tuple val(meta), path(bam)
path fasta
path fasta_fai
output:
tuple val(meta), path("*.bam") , emit: bam
tuple val(meta), path("*.bai") , optional:true, emit: bai
tuple val(meta), path("*cram"), emit: cram, optional: true
tuple val(meta), path("*bam"), emit: bam, optional: true
tuple val(meta), path("*.crai"), emit: crai, optional: true
tuple val(meta), path("*.bai"), emit: bai, optional: true
tuple val(meta), path("*.metrics"), emit: metrics
path "versions.yml" , emit: versions
path "versions.yml", emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
prefix = task.ext.prefix ?: "${meta.id}"
def input_list = bam.collect{"--INPUT $it"}.join(' ')
def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : ""
def avail_mem = 3
if (!task.memory) {
@ -33,11 +38,17 @@ process GATK4_MARKDUPLICATES {
"""
gatk --java-options "-Xmx${avail_mem}g" MarkDuplicates \\
$input_list \\
--OUTPUT ${prefix}.bam \\
--OUTPUT ${prefix} \\
--METRICS_FILE ${prefix}.metrics \\
--TMP_DIR . \\
${reference} \\
$args
if [[ ${prefix} == *.cram ]]; then
mv ${prefix}.bai ${prefix}.crai
fi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')

View file

@ -26,6 +26,14 @@ input:
type: file
description: Sorted BAM file
pattern: "*.{bam}"
- fasta:
type: file
description: Fasta file
pattern: "*.{fasta}"
- fasta_fai:
type: file
description: Fasta index file
pattern: "*.{fai}"
output:
- meta:
@ -41,6 +49,18 @@ output:
type: file
description: Marked duplicates BAM file
pattern: "*.{bam}"
- cram:
type: file
description: Marked duplicates CRAM file
pattern: "*.{cram}"
- bai:
type: file
description: BAM index file
pattern: "*.{bam.bai}"
- crai:
type: file
description: CRAM index file
pattern: "*.{cram.crai}"
- metrics:
type: file
description: Duplicate metrics file generated by GATK

View file

@ -3,13 +3,14 @@
nextflow.enable.dsl = 2
include { GATK4_MARKDUPLICATES } from '../../../../modules/gatk4/markduplicates/main.nf'
include { GATK4_MARKDUPLICATES as GATK4_MARKDUPLICATES_CRAM } from '../../../../modules/gatk4/markduplicates/main.nf'
workflow test_gatk4_markduplicates {
input = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
]
GATK4_MARKDUPLICATES ( input )
GATK4_MARKDUPLICATES ( input, [], [] )
}
workflow test_gatk4_markduplicates_multiple_bams {
@ -18,5 +19,16 @@ workflow test_gatk4_markduplicates_multiple_bams {
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true)
] ]
GATK4_MARKDUPLICATES ( input )
GATK4_MARKDUPLICATES ( input, [], [] )
}
workflow test_gatk4_markduplicates_multiple_cram_output {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true)
] ]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
GATK4_MARKDUPLICATES_CRAM ( input, fasta, fai )
}

View file

@ -4,6 +4,12 @@ process {
withName: GATK4_MARKDUPLICATES {
ext.args = '--CREATE_INDEX true'
ext.prefix = { "${meta.id}.bam" }
}
withName: GATK4_MARKDUPLICATES_CRAM {
ext.args = '--CREATE_INDEX true'
ext.prefix = { "${meta.id}.cram" }
}
}

View file

@ -1,25 +1,40 @@
- name: gatk4 markduplicates test_gatk4_markduplicates
command: nextflow run tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicates/nextflow.config
command: nextflow run ./tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicates/nextflow.config
tags:
- gatk4
- gatk4/markduplicates
files:
- path: output/gatk4/test.bai
md5sum: c8f7a9e426c768577f88f59cb1336bf3
md5sum: 4ac375b5288501c832bc3e4773f06d39
- path: output/gatk4/test.bam
md5sum: 58533ddab47f7ac07f7b10e7f4aac234
- path: output/gatk4/test.metrics
- path: output/gatk4/versions.yml
md5sum: c1a0d25c5f65514e72aa490326557a88
- path: output/gatk4/test.bam.metrics
- name: gatk4 markduplicates test_gatk4_markduplicates_multiple_bams
command: nextflow run tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates_multiple_bams -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicates/nextflow.config
command: nextflow run ./tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates_multiple_bams -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicates/nextflow.config
tags:
- gatk4
- gatk4/markduplicates
files:
- path: output/gatk4/test.bai
md5sum: bad71df9c876e72a5bc0a3e0fd755f92
md5sum: 832fbc6b3c3266d024fc7131ffd1b45d
- path: output/gatk4/test.bam
md5sum: 112580c24b43331950f24f9adea30788
- path: output/gatk4/test.metrics
- path: output/gatk4/versions.yml
md5sum: 36909e80bde1aa547fae6382fd35764d
- path: output/gatk4/test.bam.metrics
contains:
- "testN"
- "2820"
- name: gatk4 markduplicates test_gatk4_markduplicates_multiple_cram_output
command: nextflow run ./tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates_multiple_cram_output -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicates/nextflow.config
tags:
- gatk4
- gatk4/markduplicates
files:
- path: output/gatk4/test.cram
- path: output/gatk4/test.cram.crai
md5sum: 53a329f0828210e317ad81b0e6ca4bc5
- path: output/gatk4/test.cram.metrics
contains:
- "testN"
- "2820"