mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-12-22 11:08:17 +00:00
Add panel of normals subworkflow (#1044)
* commiting changes to switch branch * commit to setup remote branch * first draft of the sompon workflow * keep branch in line with gendb bugfixing * Update test.yml * tidy up main.nf * fixed md5sum Co-authored-by: GCJMackenzie <gavin.mackenzie@nibsc.org>
This commit is contained in:
parent
e0ada7d219
commit
9573cb1bec
6 changed files with 205 additions and 0 deletions
58
subworkflows/nf-core/gatk_create_som_pon/main.nf
Normal file
58
subworkflows/nf-core/gatk_create_som_pon/main.nf
Normal file
|
@ -0,0 +1,58 @@
|
||||||
|
//
|
||||||
|
// Run GATK mutect2, genomicsdbimport and createsomaticpanelofnormals
|
||||||
|
//
|
||||||
|
|
||||||
|
params.mutect2_options = [args: '--max-mnp-distance 0']
|
||||||
|
params.gendbimport_options = [:]
|
||||||
|
params.createsompon_options = [:]
|
||||||
|
|
||||||
|
include { GATK4_MUTECT2 } from '../../../modules/gatk4/mutect2/main' addParams( options: params.mutect2_options )
|
||||||
|
include { GATK4_GENOMICSDBIMPORT } from '../../../modules/gatk4/genomicsdbimport/main' addParams( options: params.gendbimport_options )
|
||||||
|
include { GATK4_CREATESOMATICPANELOFNORMALS } from '../../../modules/gatk4/createsomaticpanelofnormals/main' addParams( options: params.createsompon_options )
|
||||||
|
|
||||||
|
workflow GATK_CREATE_SOM_PON {
|
||||||
|
take:
|
||||||
|
ch_mutect2_in // channel: [ val(meta), [ input ], [ input_index ], [] ]
|
||||||
|
fasta // channel: /path/to/reference/fasta
|
||||||
|
fastaidx // channel: /path/to/reference/fasta/index
|
||||||
|
dict // channel: /path/to/reference/fasta/dictionary
|
||||||
|
pon_name // channel: name for panel of normals
|
||||||
|
interval_file // channel: /path/to/interval/file
|
||||||
|
|
||||||
|
main:
|
||||||
|
ch_versions = Channel.empty()
|
||||||
|
input = channel.from(ch_mutect2_in)
|
||||||
|
//
|
||||||
|
//Perform variant calling for each sample using mutect2 module in panel of normals mode.
|
||||||
|
//
|
||||||
|
GATK4_MUTECT2 ( input , false , true, false , [] , fasta , fastaidx , dict , [], [] , [] , [] )
|
||||||
|
ch_versions = ch_versions.mix(GATK4_MUTECT2.out.versions.first())
|
||||||
|
|
||||||
|
//
|
||||||
|
//Convert all sample vcfs into a genomicsdb workspace using genomicsdbimport.
|
||||||
|
//
|
||||||
|
ch_vcf = GATK4_MUTECT2.out.vcf.collect{it[1]}.toList()
|
||||||
|
ch_index = GATK4_MUTECT2.out.tbi.collect{it[1]}.toList()
|
||||||
|
gendb_input = Channel.of([[ id:pon_name ]]).combine(ch_vcf).combine(ch_index).combine([interval_file]).combine(['']).combine([dict])
|
||||||
|
GATK4_GENOMICSDBIMPORT ( gendb_input, false, false, false )
|
||||||
|
ch_versions = ch_versions.mix(GATK4_GENOMICSDBIMPORT.out.versions.first())
|
||||||
|
|
||||||
|
//
|
||||||
|
//Panel of normals made from genomicsdb workspace using createsomaticpanelofnormals.
|
||||||
|
//
|
||||||
|
GATK4_GENOMICSDBIMPORT.out.genomicsdb.view()
|
||||||
|
GATK4_CREATESOMATICPANELOFNORMALS ( GATK4_GENOMICSDBIMPORT.out.genomicsdb, fasta, fastaidx, dict )
|
||||||
|
ch_versions = ch_versions.mix(GATK4_CREATESOMATICPANELOFNORMALS.out.versions.first())
|
||||||
|
|
||||||
|
emit:
|
||||||
|
mutect2_vcf = GATK4_MUTECT2.out.vcf.collect() // channel: [ val(meta), [ vcf ] ]
|
||||||
|
mutect2_index = GATK4_MUTECT2.out.tbi.collect() // channel: [ val(meta), [ tbi ] ]
|
||||||
|
mutect2_stats = GATK4_MUTECT2.out.stats.collect() // channel: [ val(meta), [ stats ] ]
|
||||||
|
|
||||||
|
genomicsdb = GATK4_GENOMICSDBIMPORT.out.genomicsdb // channel: [ val(meta), [ genomicsdb ] ]
|
||||||
|
|
||||||
|
pon_vcf = GATK4_CREATESOMATICPANELOFNORMALS.out.vcf // channel: [ val(meta), [ vcf.gz ] ]
|
||||||
|
pon_index = GATK4_CREATESOMATICPANELOFNORMALS.out.tbi // channel: [ val(meta), [ tbi ] ]
|
||||||
|
|
||||||
|
versions = ch_versions // channel: [ versions.yml ]
|
||||||
|
}
|
75
subworkflows/nf-core/gatk_create_som_pon/meta.yml
Normal file
75
subworkflows/nf-core/gatk_create_som_pon/meta.yml
Normal file
|
@ -0,0 +1,75 @@
|
||||||
|
name: gatk_create_som_pon
|
||||||
|
description: Perform variant calling on a set of normal samples using mutect2 panel of normals mode. Group them into a genomicsdbworkspace using genomicsdbimport, then use this to create a panel of normals using createsomaticpanelofnormals.
|
||||||
|
keywords:
|
||||||
|
- gatk4
|
||||||
|
- mutect2
|
||||||
|
- genomicsdbimport
|
||||||
|
- createsomaticpanelofnormals
|
||||||
|
- variant_calling
|
||||||
|
- genomicsdb_workspace
|
||||||
|
- panel_of_normals
|
||||||
|
modules:
|
||||||
|
- gatk4/mutect2
|
||||||
|
- gatk4/genomicsdbimport
|
||||||
|
- gatk4/createsomaticpanelofnormals
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test' ]
|
||||||
|
- input:
|
||||||
|
type: list
|
||||||
|
description: list of BAM files, also able to take CRAM as an input
|
||||||
|
pattern: "[ *.{bam/cram} ]"
|
||||||
|
- input_index:
|
||||||
|
type: list
|
||||||
|
description: list of BAM file indexes, also able to take CRAM indexes as an input
|
||||||
|
pattern: "[ *.{bam.bai/cram.crai} ]"
|
||||||
|
- fasta:
|
||||||
|
type: file
|
||||||
|
description: The reference fasta file
|
||||||
|
pattern: "*.fasta"
|
||||||
|
- fastaidx:
|
||||||
|
type: file
|
||||||
|
description: Index of reference fasta file
|
||||||
|
pattern: "fasta.fai"
|
||||||
|
- dict:
|
||||||
|
type: file
|
||||||
|
description: GATK sequence dictionary
|
||||||
|
pattern: "*.dict"
|
||||||
|
- pon_name:
|
||||||
|
type: String
|
||||||
|
Description: name to be used for the genomicsdb workspace and panel of normals as meta_id has the individual sample names and a name for the combined files is reuired here.
|
||||||
|
pattern: "example_name"
|
||||||
|
output:
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: 'versions.yml'
|
||||||
|
- mutect2_vcf:
|
||||||
|
type: list
|
||||||
|
description: List of compressed vcf files to be used to make the gendb workspace
|
||||||
|
pattern: "[ *.vcf.gz ]"
|
||||||
|
- mutect2_index:
|
||||||
|
type: list
|
||||||
|
description: List of indexes of mutect2_vcf files
|
||||||
|
pattern: "[ *vcf.gz.tbi ]"
|
||||||
|
- mutect2_stats:
|
||||||
|
type: list
|
||||||
|
description: List of stats files that pair with mutect2_vcf files
|
||||||
|
pattern: "[ *vcf.gz.stats ]"
|
||||||
|
- genomicsdb:
|
||||||
|
type: directory
|
||||||
|
description: Directory containing the files that compose the genomicsdb workspace.
|
||||||
|
pattern: "path/name_of_workspace"
|
||||||
|
- pon_vcf:
|
||||||
|
type: file
|
||||||
|
description: Panel of normal as compressed vcf file
|
||||||
|
pattern: "*.vcf.gz"
|
||||||
|
- pon_index:
|
||||||
|
type: file
|
||||||
|
description: Index of pon_vcf file
|
||||||
|
pattern: "*vcf.gz.tbi"
|
||||||
|
authors:
|
||||||
|
- '@GCJMackenzie'
|
3
subworkflows/nf-core/gatk_create_som_pon/nextflow.config
Normal file
3
subworkflows/nf-core/gatk_create_som_pon/nextflow.config
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
params.mutect2_options = [:]
|
||||||
|
params.gendbimport_options = [:]
|
||||||
|
params.createsompon_options = [:]
|
|
@ -14,3 +14,8 @@ subworkflows/sra_fastq:
|
||||||
- subworkflows/nf-core/sra_fastq/**
|
- subworkflows/nf-core/sra_fastq/**
|
||||||
- tests/subworkflows/nf-core/sra_fastq/**
|
- tests/subworkflows/nf-core/sra_fastq/**
|
||||||
|
|
||||||
|
subworkflows/gatk_create_som_pon:
|
||||||
|
- subworkflows/nf-core/gatk_create_som_pon/**
|
||||||
|
- tests/subworkflows/nf-core/gatk_create_som_pon/**
|
||||||
|
|
||||||
|
|
26
tests/subworkflows/nf-core/gatk_create_som_pon/main.nf
Normal file
26
tests/subworkflows/nf-core/gatk_create_som_pon/main.nf
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
#!/usr/bin/env nextflow
|
||||||
|
|
||||||
|
nextflow.enable.dsl = 2
|
||||||
|
|
||||||
|
include { GATK_CREATE_SOM_PON } from '../../../../subworkflows/nf-core/gatk_create_som_pon/main' addParams( [:] )
|
||||||
|
|
||||||
|
workflow test_gatk_create_som_pon {
|
||||||
|
ch_mutect2_in = [
|
||||||
|
[[ id:'test1' ], // meta map
|
||||||
|
[file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true)],
|
||||||
|
[file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true)],
|
||||||
|
[] ],
|
||||||
|
[[ id:'test2' ], // meta map
|
||||||
|
[file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true)],
|
||||||
|
[file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true)],
|
||||||
|
[] ]
|
||||||
|
]
|
||||||
|
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
|
||||||
|
fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
|
||||||
|
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
|
||||||
|
pon_name = "test_panel"
|
||||||
|
interval_file = file(params.test_data['homo_sapiens']['genome']['genome_interval_list'], checkIfExists: true)
|
||||||
|
|
||||||
|
GATK_CREATE_SOM_PON ( ch_mutect2_in, fasta, fastaidx, dict, pon_name, interval_file )
|
||||||
|
|
||||||
|
}
|
38
tests/subworkflows/nf-core/gatk_create_som_pon/test.yml
Normal file
38
tests/subworkflows/nf-core/gatk_create_som_pon/test.yml
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
- name: gatk_create_som_pon
|
||||||
|
command: nextflow run ./tests/subworkflows/nf-core/gatk_create_som_pon -entry test_gatk_create_som_pon -c tests/config/nextflow.config
|
||||||
|
tags:
|
||||||
|
- subworkflows/gatk_create_som_pon
|
||||||
|
# Modules
|
||||||
|
- gatk4
|
||||||
|
- gatk4
|
||||||
|
- gatk4/genomicsdbimport
|
||||||
|
- gatk4/createsomaticpanelofnormals
|
||||||
|
files:
|
||||||
|
# gatk4 mutect2
|
||||||
|
- path: output/gatk4/test1.vcf.gz
|
||||||
|
- path: output/gatk4/test1.vcf.gz.stats
|
||||||
|
md5sum: 4f77301a125913170b8e9e7828b4ca3f
|
||||||
|
- path: output/gatk4/test1.vcf.gz.tbi
|
||||||
|
- path: output/gatk4/test2.vcf.gz
|
||||||
|
- path: output/gatk4/test2.vcf.gz.stats
|
||||||
|
md5sum: 106c5828b02b906c97922618b6072169
|
||||||
|
- path: output/gatk4/test2.vcf.gz.tbi
|
||||||
|
# gatk4 genomicsdbimport
|
||||||
|
- path: output/gatk4/test_panel/__tiledb_workspace.tdb
|
||||||
|
md5sum: d41d8cd98f00b204e9800998ecf8427e
|
||||||
|
- path: output/gatk4/test_panel/callset.json
|
||||||
|
md5sum: 2ab411773b7267de61f8c04939de2a99
|
||||||
|
- path: output/gatk4/test_panel/chr22$1$40001/.__consolidation_lock
|
||||||
|
md5sum: d41d8cd98f00b204e9800998ecf8427e
|
||||||
|
- path: output/gatk4/test_panel/chr22$1$40001/__array_schema.tdb
|
||||||
|
- path: output/gatk4/test_panel/chr22$1$40001/genomicsdb_meta_dir/genomicsdb_column_bounds.json
|
||||||
|
md5sum: 2502f79658bc000578ebcfddfc1194c0
|
||||||
|
- path: output/gatk4/test_panel/vcfheader.vcf
|
||||||
|
contains:
|
||||||
|
- "FORMAT=<ID=AD,Number=R,Type=Integer,Description="
|
||||||
|
- path: output/gatk4/test_panel/vidmap.json
|
||||||
|
md5sum: ee4f6815c433caa8ab101ec45ff328a6
|
||||||
|
# gatk4 createsomaticpanelofnormals
|
||||||
|
- path: output/gatk4/test_panel.vcf.gz
|
||||||
|
- path: output/gatk4/test_panel.vcf.gz.tbi
|
||||||
|
md5sum: d7e2524ba4bf7538dbee3e225a74b0da
|
Loading…
Reference in a new issue