bug fixes: genomicsdbimport (#1035)

* saving changes to checkout

* saving to sort out other branch

* removed yml tracking of files that cant be tracked due to directory name changing between runs

* test data added, ready for pr

* fix eol linting error

* Update modules/gatk4/genomicsdbimport/main.nf

Co-authored-by: Francesco L <53608000+lescai@users.noreply.github.com>

* merging with master

* update push to show progress

* tests now working untar able to pass data to genomicsdbimport

* commit to checkout

* tests updated, module reworked to simplify and emit updated gendb

* Apply suggestions from code review

Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>

* update meta.yml 

Priority of input options changed, updated to reflect this

* Update test.yml

name prefix changed in main script, test.yml updated to reflect this

* fix tests due to review changes

* bug fixes, multicalling samples and gendb emissions now fixed

* Update pytest_modules.yml

* Update meta.yml

Co-authored-by: GCJMackenzie <gavin.mackenzie@nibsc.org>
Co-authored-by: Francesco L <53608000+lescai@users.noreply.github.com>
Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>
Co-authored-by: Gregor Sturm <mail@gregor-sturm.de>
This commit is contained in:
GCJMackenzie 2021-11-06 15:51:15 +00:00 committed by GitHub
parent 729d9ae450
commit 316aedaaa6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 22 additions and 18 deletions

View file

@ -25,18 +25,18 @@ process GATK4_GENOMICSDBIMPORT {
val input_map val input_map
output: output:
tuple val(meta), path("*_genomicsdb") , optional:true, emit: genomicsdb tuple val(meta), path("${prefix}") , optional:true, emit: genomicsdb
tuple val(meta), path("$updated_db") , optional:true, emit: updatedb tuple val(meta), path("$updated_db") , optional:true, emit: updatedb
tuple val(meta), path("*.interval_list"), optional:true, emit: intervallist tuple val(meta), path("*.interval_list"), optional:true, emit: intervallist
path "versions.yml" , emit: versions path "versions.yml" , emit: versions
script: script:
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
// settings for running default create gendb mode // settings for running default create gendb mode
def inputs_command = input_map ? "--sample-name-map ${vcf[0]}" : "${'-V ' + vcf.join(' -V')}" inputs_command = input_map ? "--sample-name-map ${vcf[0]}" : "${'-V ' + vcf.join(' -V ')}"
def dir_command = "--genomicsdb-workspace-path ${prefix}" dir_command = "--genomicsdb-workspace-path ${prefix}"
def intervals_command = intervalfile ? " -L ${intervalfile} " : " -L ${intervalval} " intervals_command = intervalfile ? " -L ${intervalfile} " : " -L ${intervalval} "
// settings changed for running get intervals list mode if run_intlist is true // settings changed for running get intervals list mode if run_intlist is true
if (run_intlist) { if (run_intlist) {

View file

@ -66,7 +66,11 @@ output:
- genomicsdb: - genomicsdb:
type: directory type: directory
description: Directory containing the files that compose the genomicsdb workspace, this is only output for create mode, as update changes an existing db description: Directory containing the files that compose the genomicsdb workspace, this is only output for create mode, as update changes an existing db
pattern: "*_genomicsdb" pattern: "*/$prefix"
- updatedb:
type: directory
description: Directory containing the files that compose the updated genomicsdb workspace, this is only output for update mode, and should be the same path as the input wspace.
pattern: "same/path/as/wspace"
- intervallist: - intervallist:
type: file type: file
description: File containing the intervals used to generate the genomicsdb, only created by get intervals mode. description: File containing the intervals used to generate the genomicsdb, only created by get intervals mode.

View file

@ -7,7 +7,7 @@ include { GATK4_GENOMICSDBIMPORT } from '../../../../modules/gatk4/genomicsdbimp
workflow test_gatk4_genomicsdbimport_create_genomicsdb { workflow test_gatk4_genomicsdbimport_create_genomicsdb {
input = [ [ id:'test_genomicsdb'], // meta map input = [ [ id:'test'], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true) , file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true) ,
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true) , file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true) ,
file(params.test_data['homo_sapiens']['genome']['genome_interval_list'], checkIfExists: true) , file(params.test_data['homo_sapiens']['genome']['genome_interval_list'], checkIfExists: true) ,
@ -26,7 +26,7 @@ workflow test_gatk4_genomicsdbimport_get_intervalslist {
UNTAR ( db ) UNTAR ( db )
def input = Channel.of([ [ id:'test_genomicsdb'], // meta map def input = Channel.of([ [ id:'test'], // meta map
[] , [] ,
[] , [] ,
[] , [] ,
@ -45,7 +45,7 @@ workflow test_gatk4_genomicsdbimport_update_genomicsdb {
UNTAR ( db ) UNTAR ( db )
def input = Channel.of([ [ id:'test_genomicsdb'], // meta map def input = Channel.of([ [ id:'test'], // meta map
file( params.test_data['homo_sapiens']['illumina']['test2_genome_vcf_gz'] , checkIfExists: true) , file( params.test_data['homo_sapiens']['illumina']['test2_genome_vcf_gz'] , checkIfExists: true) ,
file( params.test_data['homo_sapiens']['illumina']['test2_genome_vcf_gz_tbi'] , checkIfExists: true) , file( params.test_data['homo_sapiens']['illumina']['test2_genome_vcf_gz_tbi'] , checkIfExists: true) ,
[] , [] ,

View file

@ -4,19 +4,19 @@
- gatk4/genomicsdbimport - gatk4/genomicsdbimport
- gatk4 - gatk4
files: files:
- path: output/gatk4/test_genomicsdb/__tiledb_workspace.tdb - path: output/gatk4/test/__tiledb_workspace.tdb
md5sum: d41d8cd98f00b204e9800998ecf8427e md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/gatk4/test_genomicsdb/callset.json - path: output/gatk4/test/callset.json
md5sum: a7d07d1c86449bbb1091ff29368da07a md5sum: a7d07d1c86449bbb1091ff29368da07a
- path: output/gatk4/test_genomicsdb/chr22$1$40001/.__consolidation_lock - path: output/gatk4/test/chr22$1$40001/.__consolidation_lock
md5sum: d41d8cd98f00b204e9800998ecf8427e md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/gatk4/test_genomicsdb/chr22$1$40001/__array_schema.tdb - path: output/gatk4/test/chr22$1$40001/__array_schema.tdb
- path: output/gatk4/test_genomicsdb/chr22$1$40001/genomicsdb_meta_dir/genomicsdb_column_bounds.json - path: output/gatk4/test/chr22$1$40001/genomicsdb_meta_dir/genomicsdb_column_bounds.json
md5sum: 2502f79658bc000578ebcfddfc1194c0 md5sum: 2502f79658bc000578ebcfddfc1194c0
- path: output/gatk4/test_genomicsdb/vcfheader.vcf - path: output/gatk4/test/vcfheader.vcf
contains: contains:
- "FORMAT=<ID=AD,Number=R,Type=Integer,Description=" - "FORMAT=<ID=AD,Number=R,Type=Integer,Description="
- path: output/gatk4/test_genomicsdb/vidmap.json - path: output/gatk4/test/vidmap.json
md5sum: 18d3f68bd2cb6f4474990507ff95017a md5sum: 18d3f68bd2cb6f4474990507ff95017a
- name: gatk4 genomicsdbimport test_gatk4_genomicsdbimport_get_intervalslist - name: gatk4 genomicsdbimport test_gatk4_genomicsdbimport_get_intervalslist
@ -25,7 +25,7 @@
- gatk4/genomicsdbimport - gatk4/genomicsdbimport
- gatk4 - gatk4
files: files:
- path: output/gatk4/test_genomicsdb.interval_list - path: output/gatk4/test.interval_list
md5sum: 4c85812ac15fc1cd29711a851d23c0bf md5sum: 4c85812ac15fc1cd29711a851d23c0bf
- name: gatk4 genomicsdbimport test_gatk4_genomicsdbimport_update_genomicsdb - name: gatk4 genomicsdbimport test_gatk4_genomicsdbimport_update_genomicsdb