mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-11-10 20:23:10 +00:00
MD finally supports cram output (#2019)
* MD finally supports cram output * publish crai file * fix test output * fix mv * figure out if contains breaks stuff * remove variable checksum * add contains check * remove variable checksum Co-authored-by: Maxime U. Garcia <max.u.garcia@gmail.com>
This commit is contained in:
parent
5e7b1ef9a5
commit
e726b1730d
5 changed files with 81 additions and 17 deletions
|
@ -9,20 +9,25 @@ process GATK4_MARKDUPLICATES {
|
||||||
|
|
||||||
input:
|
input:
|
||||||
tuple val(meta), path(bam)
|
tuple val(meta), path(bam)
|
||||||
|
path fasta
|
||||||
|
path fasta_fai
|
||||||
|
|
||||||
output:
|
output:
|
||||||
tuple val(meta), path("*.bam") , emit: bam
|
tuple val(meta), path("*cram"), emit: cram, optional: true
|
||||||
tuple val(meta), path("*.bai") , optional:true, emit: bai
|
tuple val(meta), path("*bam"), emit: bam, optional: true
|
||||||
|
tuple val(meta), path("*.crai"), emit: crai, optional: true
|
||||||
|
tuple val(meta), path("*.bai"), emit: bai, optional: true
|
||||||
tuple val(meta), path("*.metrics"), emit: metrics
|
tuple val(meta), path("*.metrics"), emit: metrics
|
||||||
path "versions.yml" , emit: versions
|
path "versions.yml", emit: versions
|
||||||
|
|
||||||
when:
|
when:
|
||||||
task.ext.when == null || task.ext.when
|
task.ext.when == null || task.ext.when
|
||||||
|
|
||||||
script:
|
script:
|
||||||
def args = task.ext.args ?: ''
|
def args = task.ext.args ?: ''
|
||||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
prefix = task.ext.prefix ?: "${meta.id}"
|
||||||
def input_list = bam.collect{"--INPUT $it"}.join(' ')
|
def input_list = bam.collect{"--INPUT $it"}.join(' ')
|
||||||
|
def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : ""
|
||||||
|
|
||||||
def avail_mem = 3
|
def avail_mem = 3
|
||||||
if (!task.memory) {
|
if (!task.memory) {
|
||||||
|
@ -33,11 +38,17 @@ process GATK4_MARKDUPLICATES {
|
||||||
"""
|
"""
|
||||||
gatk --java-options "-Xmx${avail_mem}g" MarkDuplicates \\
|
gatk --java-options "-Xmx${avail_mem}g" MarkDuplicates \\
|
||||||
$input_list \\
|
$input_list \\
|
||||||
--OUTPUT ${prefix}.bam \\
|
--OUTPUT ${prefix} \\
|
||||||
--METRICS_FILE ${prefix}.metrics \\
|
--METRICS_FILE ${prefix}.metrics \\
|
||||||
--TMP_DIR . \\
|
--TMP_DIR . \\
|
||||||
|
${reference} \\
|
||||||
$args
|
$args
|
||||||
|
|
||||||
|
|
||||||
|
if [[ ${prefix} == *.cram ]]; then
|
||||||
|
mv ${prefix}.bai ${prefix}.crai
|
||||||
|
fi
|
||||||
|
|
||||||
cat <<-END_VERSIONS > versions.yml
|
cat <<-END_VERSIONS > versions.yml
|
||||||
"${task.process}":
|
"${task.process}":
|
||||||
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
|
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
|
||||||
|
|
|
@ -26,6 +26,14 @@ input:
|
||||||
type: file
|
type: file
|
||||||
description: Sorted BAM file
|
description: Sorted BAM file
|
||||||
pattern: "*.{bam}"
|
pattern: "*.{bam}"
|
||||||
|
- fasta:
|
||||||
|
type: file
|
||||||
|
description: Fasta file
|
||||||
|
pattern: "*.{fasta}"
|
||||||
|
- fasta_fai:
|
||||||
|
type: file
|
||||||
|
description: Fasta index file
|
||||||
|
pattern: "*.{fai}"
|
||||||
|
|
||||||
output:
|
output:
|
||||||
- meta:
|
- meta:
|
||||||
|
@ -41,6 +49,18 @@ output:
|
||||||
type: file
|
type: file
|
||||||
description: Marked duplicates BAM file
|
description: Marked duplicates BAM file
|
||||||
pattern: "*.{bam}"
|
pattern: "*.{bam}"
|
||||||
|
- cram:
|
||||||
|
type: file
|
||||||
|
description: Marked duplicates CRAM file
|
||||||
|
pattern: "*.{cram}"
|
||||||
|
- bai:
|
||||||
|
type: file
|
||||||
|
description: BAM index file
|
||||||
|
pattern: "*.{bam.bai}"
|
||||||
|
- crai:
|
||||||
|
type: file
|
||||||
|
description: CRAM index file
|
||||||
|
pattern: "*.{cram.crai}"
|
||||||
- metrics:
|
- metrics:
|
||||||
type: file
|
type: file
|
||||||
description: Duplicate metrics file generated by GATK
|
description: Duplicate metrics file generated by GATK
|
||||||
|
|
|
@ -3,13 +3,14 @@
|
||||||
nextflow.enable.dsl = 2
|
nextflow.enable.dsl = 2
|
||||||
|
|
||||||
include { GATK4_MARKDUPLICATES } from '../../../../modules/gatk4/markduplicates/main.nf'
|
include { GATK4_MARKDUPLICATES } from '../../../../modules/gatk4/markduplicates/main.nf'
|
||||||
|
include { GATK4_MARKDUPLICATES as GATK4_MARKDUPLICATES_CRAM } from '../../../../modules/gatk4/markduplicates/main.nf'
|
||||||
|
|
||||||
workflow test_gatk4_markduplicates {
|
workflow test_gatk4_markduplicates {
|
||||||
input = [ [ id:'test', single_end:false ], // meta map
|
input = [ [ id:'test', single_end:false ], // meta map
|
||||||
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
|
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
|
||||||
]
|
]
|
||||||
|
|
||||||
GATK4_MARKDUPLICATES ( input )
|
GATK4_MARKDUPLICATES ( input, [], [] )
|
||||||
}
|
}
|
||||||
|
|
||||||
workflow test_gatk4_markduplicates_multiple_bams {
|
workflow test_gatk4_markduplicates_multiple_bams {
|
||||||
|
@ -18,5 +19,16 @@ workflow test_gatk4_markduplicates_multiple_bams {
|
||||||
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true)
|
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true)
|
||||||
] ]
|
] ]
|
||||||
|
|
||||||
GATK4_MARKDUPLICATES ( input )
|
GATK4_MARKDUPLICATES ( input, [], [] )
|
||||||
|
}
|
||||||
|
|
||||||
|
workflow test_gatk4_markduplicates_multiple_cram_output {
|
||||||
|
input = [ [ id:'test', single_end:false ], // meta map
|
||||||
|
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
|
||||||
|
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true)
|
||||||
|
] ]
|
||||||
|
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
|
||||||
|
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
|
||||||
|
|
||||||
|
GATK4_MARKDUPLICATES_CRAM ( input, fasta, fai )
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,12 @@ process {
|
||||||
|
|
||||||
withName: GATK4_MARKDUPLICATES {
|
withName: GATK4_MARKDUPLICATES {
|
||||||
ext.args = '--CREATE_INDEX true'
|
ext.args = '--CREATE_INDEX true'
|
||||||
|
ext.prefix = { "${meta.id}.bam" }
|
||||||
|
}
|
||||||
|
|
||||||
|
withName: GATK4_MARKDUPLICATES_CRAM {
|
||||||
|
ext.args = '--CREATE_INDEX true'
|
||||||
|
ext.prefix = { "${meta.id}.cram" }
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,25 +1,40 @@
|
||||||
- name: gatk4 markduplicates test_gatk4_markduplicates
|
- name: gatk4 markduplicates test_gatk4_markduplicates
|
||||||
command: nextflow run tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicates/nextflow.config
|
command: nextflow run ./tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicates/nextflow.config
|
||||||
tags:
|
tags:
|
||||||
- gatk4
|
- gatk4
|
||||||
- gatk4/markduplicates
|
- gatk4/markduplicates
|
||||||
files:
|
files:
|
||||||
- path: output/gatk4/test.bai
|
- path: output/gatk4/test.bai
|
||||||
md5sum: c8f7a9e426c768577f88f59cb1336bf3
|
md5sum: 4ac375b5288501c832bc3e4773f06d39
|
||||||
- path: output/gatk4/test.bam
|
- path: output/gatk4/test.bam
|
||||||
md5sum: 58533ddab47f7ac07f7b10e7f4aac234
|
md5sum: c1a0d25c5f65514e72aa490326557a88
|
||||||
- path: output/gatk4/test.metrics
|
- path: output/gatk4/test.bam.metrics
|
||||||
- path: output/gatk4/versions.yml
|
|
||||||
|
|
||||||
- name: gatk4 markduplicates test_gatk4_markduplicates_multiple_bams
|
- name: gatk4 markduplicates test_gatk4_markduplicates_multiple_bams
|
||||||
command: nextflow run tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates_multiple_bams -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicates/nextflow.config
|
command: nextflow run ./tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates_multiple_bams -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicates/nextflow.config
|
||||||
tags:
|
tags:
|
||||||
- gatk4
|
- gatk4
|
||||||
- gatk4/markduplicates
|
- gatk4/markduplicates
|
||||||
files:
|
files:
|
||||||
- path: output/gatk4/test.bai
|
- path: output/gatk4/test.bai
|
||||||
md5sum: bad71df9c876e72a5bc0a3e0fd755f92
|
md5sum: 832fbc6b3c3266d024fc7131ffd1b45d
|
||||||
- path: output/gatk4/test.bam
|
- path: output/gatk4/test.bam
|
||||||
md5sum: 112580c24b43331950f24f9adea30788
|
md5sum: 36909e80bde1aa547fae6382fd35764d
|
||||||
- path: output/gatk4/test.metrics
|
- path: output/gatk4/test.bam.metrics
|
||||||
- path: output/gatk4/versions.yml
|
contains:
|
||||||
|
- "testN"
|
||||||
|
- "2820"
|
||||||
|
|
||||||
|
- name: gatk4 markduplicates test_gatk4_markduplicates_multiple_cram_output
|
||||||
|
command: nextflow run ./tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates_multiple_cram_output -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicates/nextflow.config
|
||||||
|
tags:
|
||||||
|
- gatk4
|
||||||
|
- gatk4/markduplicates
|
||||||
|
files:
|
||||||
|
- path: output/gatk4/test.cram
|
||||||
|
- path: output/gatk4/test.cram.crai
|
||||||
|
md5sum: 53a329f0828210e317ad81b0e6ca4bc5
|
||||||
|
- path: output/gatk4/test.cram.metrics
|
||||||
|
contains:
|
||||||
|
- "testN"
|
||||||
|
- "2820"
|
||||||
|
|
Loading…
Reference in a new issue