Merge pull request #1541 from matthdsm/subwf/bam_qc

New subworkflow:  bam_qc_picard
This commit is contained in:
Matthias De Smet 2022-04-29 14:31:38 +02:00 committed by GitHub
commit 0ec3dea37f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 166 additions and 0 deletions

View file

@ -0,0 +1,41 @@
//
// Run QC steps on BAM/CRAM files using Picard
//
include { PICARD_COLLECTMULTIPLEMETRICS } from '../../../modules/picard/collectmultiplemetrics/main'
include { PICARD_COLLECTWGSMETRICS } from '../../../modules/picard/collectwgsmetrics/main'
include { PICARD_COLLECTHSMETRICS } from '../../../modules/picard/collecthsmetrics/main'
workflow BAM_QC_PICARD {
take:
ch_bam // channel: [ val(meta), [ bam ]]
ch_fasta // channel: [ fasta ]
ch_fasta_fai // channel: [ fasta_fai ]
ch_bait_interval // channel: [ bait_interval ]
ch_target_interval // channel: [ target_interval ]
main:
ch_versions = Channel.empty()
ch_coverage_metrics = Channel.empty()
PICARD_COLLECTMULTIPLEMETRICS( ch_bam, ch_fasta )
ch_versions = ch_versions.mix(PICARD_COLLECTMULTIPLEMETRICS.out.versions.first())
if (ch_bait_interval || ch_target_interval) {
if (!ch_bait_interval) log.error("Bait interval channel is empty")
if (!ch_target_interval) log.error("Target interval channel is empty")
PICARD_COLLECTHSMETRICS( ch_bam, ch_fasta, ch_fasta_fai, ch_bait_interval, ch_target_interval )
ch_coverage_metrics = ch_coverage_metrics.mix(PICARD_COLLECTHSMETRICS.out.metrics)
ch_versions = ch_versions.mix(PICARD_COLLECTHSMETRICS.out.versions.first())
} else {
PICARD_COLLECTWGSMETRICS( ch_bam, ch_fasta )
ch_versions = ch_versions.mix(PICARD_COLLECTWGSMETRICS.out.versions.first())
ch_coverage_metrics = ch_coverage_metrics.mix(PICARD_COLLECTWGSMETRICS.out.metrics)
}
emit:
coverage_metrics = ch_coverage_metrics // channel: [ val(meta), [ coverage_metrics ] ]
multiple_metrics = PICARD_COLLECTMULTIPLEMETRICS.out.metrics // channel: [ val(meta), [ multiple_metrics ] ]
versions = ch_versions // channel: [ versions.yml ]
}

View file

@ -0,0 +1,60 @@
name: bam_qc
description: Produces comprehensive statistics from BAM file
keywords:
- statistics
- counts
- hs_metrics
- wgs_metrics
- bam
- sam
- cram
modules:
- picard/collectmultiplemetrics
- picard/collectwgsmetrics
- picard/collecthsmetrics
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- fasta:
type: optional file
description: Reference fasta file
pattern: "*.{fasta,fa}"
- fasta_fai:
type: optional file
description: Reference fasta file index
pattern: "*.{fasta,fa}.fai"
- bait_intervals:
type: optional file
description: An interval list file that contains the locations of the baits used.
pattern: "baits.interval_list"
- target_intervals:
type: optional file
description: An interval list file that contains the locations of the targets.
pattern: "targets.interval_list"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- coverage_metrics:
type: file
description: Alignment metrics files generated by picard CollectHsMetrics or CollectWgsMetrics
pattern: "*_metrics.txt"
- multiple_metrics:
type: file
description: Alignment metrics files generated by picard CollectMultipleMetrics
pattern: "*_{metrics}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@matthdsm"

View file

@ -0,0 +1,27 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { BAM_QC_PICARD } from '../../../../subworkflows/nf-core/bam_qc_picard/main' addParams([:])
workflow test_bam_qc_picard_wgs {
input = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
]
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
fasta_fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
BAM_QC_PICARD ( input, fasta, fasta_fai, [], [] )
}
workflow test_bam_qc_picard_targetted {
input = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
]
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
fasta_fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
bait = file(params.test_data['sarscov2']['genome']['baits_interval_list'], checkIfExists: true)
target = file(params.test_data['sarscov2']['genome']['targets_interval_list'], checkIfExists: true)
BAM_QC_PICARD ( input, fasta, fasta_fai, bait, target )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,33 @@
- name: bam qc picard wgs
command: nextflow run ./tests/subworkflows/nf-core/bam_qc_picard -entry test_bam_qc_picard_wgs -c tests/config/nextflow.config
tags:
- subworkflows
# - subworkflows/bam_qc_picard
# Modules
# - picard
# - picard/collectmultiplemetrics
# - picard/collectwgsmetrics
files:
- path: ./output/picard/test.CollectMultipleMetrics.alignment_summary_metrics
- path: ./output/picard/test.CollectMultipleMetrics.insert_size_metrics
- path: ./output/picard/test.CollectMultipleMetrics.base_distribution_by_cycle_metrics
- path: ./output/picard/test.CollectMultipleMetrics.quality_by_cycle_metrics
- path: ./output/picard/test.CollectMultipleMetrics.quality_distribution_metrics
- path: ./output/picard/test.CollectWgsMetrics.coverage_metrics
- name: bam qc picard targetted
command: nextflow run ./tests/subworkflows/nf-core/bam_qc_picard -entry test_bam_qc_picard_targetted -c tests/config/nextflow.config
tags:
- subworkflows
# - subworkflows/bam_qc_picard
# Modules
# - picard
# - picard/collectmultiplemetrics
# - picard/collecthsmetrics
files:
- path: ./output/picard/test.CollectMultipleMetrics.alignment_summary_metrics
- path: ./output/picard/test.CollectMultipleMetrics.insert_size_metrics
- path: ./output/picard/test.CollectMultipleMetrics.base_distribution_by_cycle_metrics
- path: ./output/picard/test.CollectMultipleMetrics.quality_by_cycle_metrics
- path: ./output/picard/test.CollectMultipleMetrics.quality_distribution_metrics
- path: ./output/picard/test.CollectHsMetrics.coverage_metrics