mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-12-22 02:58:17 +00:00
Add panel of normals subworkflow (#1044)
* commiting changes to switch branch * commit to setup remote branch * first draft of the sompon workflow * keep branch in line with gendb bugfixing * Update test.yml * tidy up main.nf * fixed md5sum Co-authored-by: GCJMackenzie <gavin.mackenzie@nibsc.org>
This commit is contained in:
parent
e0ada7d219
commit
9573cb1bec
6 changed files with 205 additions and 0 deletions
58
subworkflows/nf-core/gatk_create_som_pon/main.nf
Normal file
58
subworkflows/nf-core/gatk_create_som_pon/main.nf
Normal file
|
@ -0,0 +1,58 @@
|
|||
//
|
||||
// Run GATK mutect2, genomicsdbimport and createsomaticpanelofnormals
|
||||
//
|
||||
|
||||
params.mutect2_options = [args: '--max-mnp-distance 0']
|
||||
params.gendbimport_options = [:]
|
||||
params.createsompon_options = [:]
|
||||
|
||||
include { GATK4_MUTECT2 } from '../../../modules/gatk4/mutect2/main' addParams( options: params.mutect2_options )
|
||||
include { GATK4_GENOMICSDBIMPORT } from '../../../modules/gatk4/genomicsdbimport/main' addParams( options: params.gendbimport_options )
|
||||
include { GATK4_CREATESOMATICPANELOFNORMALS } from '../../../modules/gatk4/createsomaticpanelofnormals/main' addParams( options: params.createsompon_options )
|
||||
|
||||
workflow GATK_CREATE_SOM_PON {
|
||||
take:
|
||||
ch_mutect2_in // channel: [ val(meta), [ input ], [ input_index ], [] ]
|
||||
fasta // channel: /path/to/reference/fasta
|
||||
fastaidx // channel: /path/to/reference/fasta/index
|
||||
dict // channel: /path/to/reference/fasta/dictionary
|
||||
pon_name // channel: name for panel of normals
|
||||
interval_file // channel: /path/to/interval/file
|
||||
|
||||
main:
|
||||
ch_versions = Channel.empty()
|
||||
input = channel.from(ch_mutect2_in)
|
||||
//
|
||||
//Perform variant calling for each sample using mutect2 module in panel of normals mode.
|
||||
//
|
||||
GATK4_MUTECT2 ( input , false , true, false , [] , fasta , fastaidx , dict , [], [] , [] , [] )
|
||||
ch_versions = ch_versions.mix(GATK4_MUTECT2.out.versions.first())
|
||||
|
||||
//
|
||||
//Convert all sample vcfs into a genomicsdb workspace using genomicsdbimport.
|
||||
//
|
||||
ch_vcf = GATK4_MUTECT2.out.vcf.collect{it[1]}.toList()
|
||||
ch_index = GATK4_MUTECT2.out.tbi.collect{it[1]}.toList()
|
||||
gendb_input = Channel.of([[ id:pon_name ]]).combine(ch_vcf).combine(ch_index).combine([interval_file]).combine(['']).combine([dict])
|
||||
GATK4_GENOMICSDBIMPORT ( gendb_input, false, false, false )
|
||||
ch_versions = ch_versions.mix(GATK4_GENOMICSDBIMPORT.out.versions.first())
|
||||
|
||||
//
|
||||
//Panel of normals made from genomicsdb workspace using createsomaticpanelofnormals.
|
||||
//
|
||||
GATK4_GENOMICSDBIMPORT.out.genomicsdb.view()
|
||||
GATK4_CREATESOMATICPANELOFNORMALS ( GATK4_GENOMICSDBIMPORT.out.genomicsdb, fasta, fastaidx, dict )
|
||||
ch_versions = ch_versions.mix(GATK4_CREATESOMATICPANELOFNORMALS.out.versions.first())
|
||||
|
||||
emit:
|
||||
mutect2_vcf = GATK4_MUTECT2.out.vcf.collect() // channel: [ val(meta), [ vcf ] ]
|
||||
mutect2_index = GATK4_MUTECT2.out.tbi.collect() // channel: [ val(meta), [ tbi ] ]
|
||||
mutect2_stats = GATK4_MUTECT2.out.stats.collect() // channel: [ val(meta), [ stats ] ]
|
||||
|
||||
genomicsdb = GATK4_GENOMICSDBIMPORT.out.genomicsdb // channel: [ val(meta), [ genomicsdb ] ]
|
||||
|
||||
pon_vcf = GATK4_CREATESOMATICPANELOFNORMALS.out.vcf // channel: [ val(meta), [ vcf.gz ] ]
|
||||
pon_index = GATK4_CREATESOMATICPANELOFNORMALS.out.tbi // channel: [ val(meta), [ tbi ] ]
|
||||
|
||||
versions = ch_versions // channel: [ versions.yml ]
|
||||
}
|
75
subworkflows/nf-core/gatk_create_som_pon/meta.yml
Normal file
75
subworkflows/nf-core/gatk_create_som_pon/meta.yml
Normal file
|
@ -0,0 +1,75 @@
|
|||
name: gatk_create_som_pon
|
||||
description: Perform variant calling on a set of normal samples using mutect2 panel of normals mode. Group them into a genomicsdbworkspace using genomicsdbimport, then use this to create a panel of normals using createsomaticpanelofnormals.
|
||||
keywords:
|
||||
- gatk4
|
||||
- mutect2
|
||||
- genomicsdbimport
|
||||
- createsomaticpanelofnormals
|
||||
- variant_calling
|
||||
- genomicsdb_workspace
|
||||
- panel_of_normals
|
||||
modules:
|
||||
- gatk4/mutect2
|
||||
- gatk4/genomicsdbimport
|
||||
- gatk4/createsomaticpanelofnormals
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test' ]
|
||||
- input:
|
||||
type: list
|
||||
description: list of BAM files, also able to take CRAM as an input
|
||||
pattern: "[ *.{bam/cram} ]"
|
||||
- input_index:
|
||||
type: list
|
||||
description: list of BAM file indexes, also able to take CRAM indexes as an input
|
||||
pattern: "[ *.{bam.bai/cram.crai} ]"
|
||||
- fasta:
|
||||
type: file
|
||||
description: The reference fasta file
|
||||
pattern: "*.fasta"
|
||||
- fastaidx:
|
||||
type: file
|
||||
description: Index of reference fasta file
|
||||
pattern: "fasta.fai"
|
||||
- dict:
|
||||
type: file
|
||||
description: GATK sequence dictionary
|
||||
pattern: "*.dict"
|
||||
- pon_name:
|
||||
type: String
|
||||
Description: name to be used for the genomicsdb workspace and panel of normals as meta_id has the individual sample names and a name for the combined files is reuired here.
|
||||
pattern: "example_name"
|
||||
output:
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: 'versions.yml'
|
||||
- mutect2_vcf:
|
||||
type: list
|
||||
description: List of compressed vcf files to be used to make the gendb workspace
|
||||
pattern: "[ *.vcf.gz ]"
|
||||
- mutect2_index:
|
||||
type: list
|
||||
description: List of indexes of mutect2_vcf files
|
||||
pattern: "[ *vcf.gz.tbi ]"
|
||||
- mutect2_stats:
|
||||
type: list
|
||||
description: List of stats files that pair with mutect2_vcf files
|
||||
pattern: "[ *vcf.gz.stats ]"
|
||||
- genomicsdb:
|
||||
type: directory
|
||||
description: Directory containing the files that compose the genomicsdb workspace.
|
||||
pattern: "path/name_of_workspace"
|
||||
- pon_vcf:
|
||||
type: file
|
||||
description: Panel of normal as compressed vcf file
|
||||
pattern: "*.vcf.gz"
|
||||
- pon_index:
|
||||
type: file
|
||||
description: Index of pon_vcf file
|
||||
pattern: "*vcf.gz.tbi"
|
||||
authors:
|
||||
- '@GCJMackenzie'
|
3
subworkflows/nf-core/gatk_create_som_pon/nextflow.config
Normal file
3
subworkflows/nf-core/gatk_create_som_pon/nextflow.config
Normal file
|
@ -0,0 +1,3 @@
|
|||
params.mutect2_options = [:]
|
||||
params.gendbimport_options = [:]
|
||||
params.createsompon_options = [:]
|
|
@ -14,3 +14,8 @@ subworkflows/sra_fastq:
|
|||
- subworkflows/nf-core/sra_fastq/**
|
||||
- tests/subworkflows/nf-core/sra_fastq/**
|
||||
|
||||
subworkflows/gatk_create_som_pon:
|
||||
- subworkflows/nf-core/gatk_create_som_pon/**
|
||||
- tests/subworkflows/nf-core/gatk_create_som_pon/**
|
||||
|
||||
|
26
tests/subworkflows/nf-core/gatk_create_som_pon/main.nf
Normal file
26
tests/subworkflows/nf-core/gatk_create_som_pon/main.nf
Normal file
|
@ -0,0 +1,26 @@
|
|||
#!/usr/bin/env nextflow
|
||||
|
||||
nextflow.enable.dsl = 2
|
||||
|
||||
include { GATK_CREATE_SOM_PON } from '../../../../subworkflows/nf-core/gatk_create_som_pon/main' addParams( [:] )
|
||||
|
||||
workflow test_gatk_create_som_pon {
|
||||
ch_mutect2_in = [
|
||||
[[ id:'test1' ], // meta map
|
||||
[file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true)],
|
||||
[file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true)],
|
||||
[] ],
|
||||
[[ id:'test2' ], // meta map
|
||||
[file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true)],
|
||||
[file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true)],
|
||||
[] ]
|
||||
]
|
||||
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
|
||||
fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
|
||||
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
|
||||
pon_name = "test_panel"
|
||||
interval_file = file(params.test_data['homo_sapiens']['genome']['genome_interval_list'], checkIfExists: true)
|
||||
|
||||
GATK_CREATE_SOM_PON ( ch_mutect2_in, fasta, fastaidx, dict, pon_name, interval_file )
|
||||
|
||||
}
|
38
tests/subworkflows/nf-core/gatk_create_som_pon/test.yml
Normal file
38
tests/subworkflows/nf-core/gatk_create_som_pon/test.yml
Normal file
|
@ -0,0 +1,38 @@
|
|||
- name: gatk_create_som_pon
|
||||
command: nextflow run ./tests/subworkflows/nf-core/gatk_create_som_pon -entry test_gatk_create_som_pon -c tests/config/nextflow.config
|
||||
tags:
|
||||
- subworkflows/gatk_create_som_pon
|
||||
# Modules
|
||||
- gatk4
|
||||
- gatk4
|
||||
- gatk4/genomicsdbimport
|
||||
- gatk4/createsomaticpanelofnormals
|
||||
files:
|
||||
# gatk4 mutect2
|
||||
- path: output/gatk4/test1.vcf.gz
|
||||
- path: output/gatk4/test1.vcf.gz.stats
|
||||
md5sum: 4f77301a125913170b8e9e7828b4ca3f
|
||||
- path: output/gatk4/test1.vcf.gz.tbi
|
||||
- path: output/gatk4/test2.vcf.gz
|
||||
- path: output/gatk4/test2.vcf.gz.stats
|
||||
md5sum: 106c5828b02b906c97922618b6072169
|
||||
- path: output/gatk4/test2.vcf.gz.tbi
|
||||
# gatk4 genomicsdbimport
|
||||
- path: output/gatk4/test_panel/__tiledb_workspace.tdb
|
||||
md5sum: d41d8cd98f00b204e9800998ecf8427e
|
||||
- path: output/gatk4/test_panel/callset.json
|
||||
md5sum: 2ab411773b7267de61f8c04939de2a99
|
||||
- path: output/gatk4/test_panel/chr22$1$40001/.__consolidation_lock
|
||||
md5sum: d41d8cd98f00b204e9800998ecf8427e
|
||||
- path: output/gatk4/test_panel/chr22$1$40001/__array_schema.tdb
|
||||
- path: output/gatk4/test_panel/chr22$1$40001/genomicsdb_meta_dir/genomicsdb_column_bounds.json
|
||||
md5sum: 2502f79658bc000578ebcfddfc1194c0
|
||||
- path: output/gatk4/test_panel/vcfheader.vcf
|
||||
contains:
|
||||
- "FORMAT=<ID=AD,Number=R,Type=Integer,Description="
|
||||
- path: output/gatk4/test_panel/vidmap.json
|
||||
md5sum: ee4f6815c433caa8ab101ec45ff328a6
|
||||
# gatk4 createsomaticpanelofnormals
|
||||
- path: output/gatk4/test_panel.vcf.gz
|
||||
- path: output/gatk4/test_panel.vcf.gz.tbi
|
||||
md5sum: d7e2524ba4bf7538dbee3e225a74b0da
|
Loading…
Reference in a new issue