MD finally supports cram output (#2019)

* MD finally supports cram output

* publish crai file

* fix test output

* fix mv

* figure out if contains breaks stuff

* remove variable checksum

* add contains check

* remove variable checksum

Co-authored-by: Maxime U. Garcia <max.u.garcia@gmail.com>
This commit is contained in:
FriederikeHanssen 2022-09-07 11:04:58 +02:00 committed by GitHub
parent 5e7b1ef9a5
commit e726b1730d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 81 additions and 17 deletions

View file

@ -9,20 +9,25 @@ process GATK4_MARKDUPLICATES {
input: input:
tuple val(meta), path(bam) tuple val(meta), path(bam)
path fasta
path fasta_fai
output: output:
tuple val(meta), path("*.bam") , emit: bam tuple val(meta), path("*cram"), emit: cram, optional: true
tuple val(meta), path("*.bai") , optional:true, emit: bai tuple val(meta), path("*bam"), emit: bam, optional: true
tuple val(meta), path("*.crai"), emit: crai, optional: true
tuple val(meta), path("*.bai"), emit: bai, optional: true
tuple val(meta), path("*.metrics"), emit: metrics tuple val(meta), path("*.metrics"), emit: metrics
path "versions.yml" , emit: versions path "versions.yml", emit: versions
when: when:
task.ext.when == null || task.ext.when task.ext.when == null || task.ext.when
script: script:
def args = task.ext.args ?: '' def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}" prefix = task.ext.prefix ?: "${meta.id}"
def input_list = bam.collect{"--INPUT $it"}.join(' ') def input_list = bam.collect{"--INPUT $it"}.join(' ')
def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : ""
def avail_mem = 3 def avail_mem = 3
if (!task.memory) { if (!task.memory) {
@ -33,11 +38,17 @@ process GATK4_MARKDUPLICATES {
""" """
gatk --java-options "-Xmx${avail_mem}g" MarkDuplicates \\ gatk --java-options "-Xmx${avail_mem}g" MarkDuplicates \\
$input_list \\ $input_list \\
--OUTPUT ${prefix}.bam \\ --OUTPUT ${prefix} \\
--METRICS_FILE ${prefix}.metrics \\ --METRICS_FILE ${prefix}.metrics \\
--TMP_DIR . \\ --TMP_DIR . \\
${reference} \\
$args $args
if [[ ${prefix} == *.cram ]]; then
mv ${prefix}.bai ${prefix}.crai
fi
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
"${task.process}": "${task.process}":
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')

View file

@ -26,6 +26,14 @@ input:
type: file type: file
description: Sorted BAM file description: Sorted BAM file
pattern: "*.{bam}" pattern: "*.{bam}"
- fasta:
type: file
description: Fasta file
pattern: "*.{fasta}"
- fasta_fai:
type: file
description: Fasta index file
pattern: "*.{fai}"
output: output:
- meta: - meta:
@ -41,6 +49,18 @@ output:
type: file type: file
description: Marked duplicates BAM file description: Marked duplicates BAM file
pattern: "*.{bam}" pattern: "*.{bam}"
- cram:
type: file
description: Marked duplicates CRAM file
pattern: "*.{cram}"
- bai:
type: file
description: BAM index file
pattern: "*.{bam.bai}"
- crai:
type: file
description: CRAM index file
pattern: "*.{cram.crai}"
- metrics: - metrics:
type: file type: file
description: Duplicate metrics file generated by GATK description: Duplicate metrics file generated by GATK

View file

@ -3,13 +3,14 @@
nextflow.enable.dsl = 2 nextflow.enable.dsl = 2
include { GATK4_MARKDUPLICATES } from '../../../../modules/gatk4/markduplicates/main.nf' include { GATK4_MARKDUPLICATES } from '../../../../modules/gatk4/markduplicates/main.nf'
include { GATK4_MARKDUPLICATES as GATK4_MARKDUPLICATES_CRAM } from '../../../../modules/gatk4/markduplicates/main.nf'
workflow test_gatk4_markduplicates { workflow test_gatk4_markduplicates {
input = [ [ id:'test', single_end:false ], // meta map input = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
] ]
GATK4_MARKDUPLICATES ( input ) GATK4_MARKDUPLICATES ( input, [], [] )
} }
workflow test_gatk4_markduplicates_multiple_bams { workflow test_gatk4_markduplicates_multiple_bams {
@ -18,5 +19,16 @@ workflow test_gatk4_markduplicates_multiple_bams {
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true) file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true)
] ] ] ]
GATK4_MARKDUPLICATES ( input ) GATK4_MARKDUPLICATES ( input, [], [] )
}
workflow test_gatk4_markduplicates_multiple_cram_output {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true)
] ]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
GATK4_MARKDUPLICATES_CRAM ( input, fasta, fai )
} }

View file

@ -4,6 +4,12 @@ process {
withName: GATK4_MARKDUPLICATES { withName: GATK4_MARKDUPLICATES {
ext.args = '--CREATE_INDEX true' ext.args = '--CREATE_INDEX true'
ext.prefix = { "${meta.id}.bam" }
}
withName: GATK4_MARKDUPLICATES_CRAM {
ext.args = '--CREATE_INDEX true'
ext.prefix = { "${meta.id}.cram" }
} }
} }

View file

@ -1,25 +1,40 @@
- name: gatk4 markduplicates test_gatk4_markduplicates - name: gatk4 markduplicates test_gatk4_markduplicates
command: nextflow run tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicates/nextflow.config command: nextflow run ./tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicates/nextflow.config
tags: tags:
- gatk4 - gatk4
- gatk4/markduplicates - gatk4/markduplicates
files: files:
- path: output/gatk4/test.bai - path: output/gatk4/test.bai
md5sum: c8f7a9e426c768577f88f59cb1336bf3 md5sum: 4ac375b5288501c832bc3e4773f06d39
- path: output/gatk4/test.bam - path: output/gatk4/test.bam
md5sum: 58533ddab47f7ac07f7b10e7f4aac234 md5sum: c1a0d25c5f65514e72aa490326557a88
- path: output/gatk4/test.metrics - path: output/gatk4/test.bam.metrics
- path: output/gatk4/versions.yml
- name: gatk4 markduplicates test_gatk4_markduplicates_multiple_bams - name: gatk4 markduplicates test_gatk4_markduplicates_multiple_bams
command: nextflow run tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates_multiple_bams -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicates/nextflow.config command: nextflow run ./tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates_multiple_bams -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicates/nextflow.config
tags: tags:
- gatk4 - gatk4
- gatk4/markduplicates - gatk4/markduplicates
files: files:
- path: output/gatk4/test.bai - path: output/gatk4/test.bai
md5sum: bad71df9c876e72a5bc0a3e0fd755f92 md5sum: 832fbc6b3c3266d024fc7131ffd1b45d
- path: output/gatk4/test.bam - path: output/gatk4/test.bam
md5sum: 112580c24b43331950f24f9adea30788 md5sum: 36909e80bde1aa547fae6382fd35764d
- path: output/gatk4/test.metrics - path: output/gatk4/test.bam.metrics
- path: output/gatk4/versions.yml contains:
- "testN"
- "2820"
- name: gatk4 markduplicates test_gatk4_markduplicates_multiple_cram_output
command: nextflow run ./tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates_multiple_cram_output -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicates/nextflow.config
tags:
- gatk4
- gatk4/markduplicates
files:
- path: output/gatk4/test.cram
- path: output/gatk4/test.cram.crai
md5sum: 53a329f0828210e317ad81b0e6ca4bc5
- path: output/gatk4/test.cram.metrics
contains:
- "testN"
- "2820"