Merge branch 'master' into bump/picard

This commit is contained in:
Matthias De Smet 2022-06-02 22:09:42 +02:00 committed by GitHub
commit cf9d62fc83
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 275 additions and 15 deletions

View file

@ -11,8 +11,8 @@ RUN conda env create -f /environment.yml && conda clean -a
# Setup default ARG variables # Setup default ARG variables
ARG GENOME=GRCh38 ARG GENOME=GRCh38
ARG SPECIES=homo_sapiens ARG SPECIES=homo_sapiens
ARG VEP_VERSION=104 ARG VEP_VERSION=105
ARG VEP_TAG=104.3 ARG VEP_TAG=105.0
# Add conda installation dir to PATH (instead of doing 'conda activate') # Add conda installation dir to PATH (instead of doing 'conda activate')
ENV PATH /opt/conda/envs/nf-core-vep-${VEP_TAG}/bin:$PATH ENV PATH /opt/conda/envs/nf-core-vep-${VEP_TAG}/bin:$PATH

View file

@ -20,9 +20,9 @@ build_push() {
docker push nfcore/vep:${VEP_TAG}.${GENOME} docker push nfcore/vep:${VEP_TAG}.${GENOME}
} }
build_push "GRCh37" "homo_sapiens" "104" "104.3" build_push "GRCh37" "homo_sapiens" "105" "105.0"
build_push "GRCh38" "homo_sapiens" "104" "104.3" build_push "GRCh38" "homo_sapiens" "105" "105.0"
build_push "GRCm38" "mus_musculus" "102" "104.3" build_push "GRCm38" "mus_musculus" "102" "105.0"
build_push "GRCm39" "mus_musculus" "104" "104.3" build_push "GRCm39" "mus_musculus" "105" "105.0"
build_push "CanFam3.1" "canis_lupus_familiaris" "104" "104.3" build_push "CanFam3.1" "canis_lupus_familiaris" "104" "105.0"
build_push "WBcel235" "caenorhabditis_elegans" "104" "104.3" build_push "WBcel235" "caenorhabditis_elegans" "105" "105.0"

View file

@ -1,10 +1,10 @@
# You can use this file to create a conda environment for this module: # You can use this file to create a conda environment for this module:
# conda env create -f environment.yml # conda env create -f environment.yml
name: nf-core-vep-104.3 name: nf-core-vep-105.0
channels: channels:
- conda-forge - conda-forge
- bioconda - bioconda
- defaults - defaults
dependencies: dependencies:
- bioconda::ensembl-vep=104.3 - bioconda::ensembl-vep=105.0

View file

@ -0,0 +1,50 @@
process GATK4_CALIBRATEDRAGSTRMODEL {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0':
'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }"
input:
tuple val(meta), path(bam), path(bam_index), path(intervals)
path fasta
path fasta_fai
path dict
path strtablefile
output:
tuple val(meta), path("*.txt") , emit: dragstr_model
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def intervals_command = intervals ? "--intervals $intervals" : ""
def avail_mem = 3
if (!task.memory) {
log.info '[GATK CalibrateDragstrModel] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
}
"""
gatk --java-options "-Xmx${avail_mem}g" CalibrateDragstrModel \\
--input $bam \\
--output ${prefix}.txt \\
--reference $fasta \\
--str-table-path $strtablefile \\
$intervals_command \\
--tmp-dir . \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
END_VERSIONS
"""
}

View file

@ -0,0 +1,74 @@
name: gatk4_calibratedragstrmodel
description: estimates the parameters for the DRAGstr model
keywords:
- gatk4
- bam
- cram
- sam
- calibratedragstrmodel
tools:
- gatk4:
description:
Genome Analysis Toolkit (GATK4). Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools
with a primary focus on variant discovery and genotyping. Its powerful processing engine
and high-performance computing features make it capable of taking on projects of any size.
homepage: https://gatk.broadinstitute.org/hc/en-us
documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360057441571-CalibrateDragstrModel-BETA-
tool_dev_url: https://github.com/broadinstitute/gatk
doi: 10.1158/1538-7445.AM2017-3590
licence: ["Apache-2.0"]
input:
# Only when we have meta
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- bam_index:
type: file
description: index of the BAM/CRAM/SAM file
pattern: "*.{bai,crai,sai}"
- intervals:
type: file
description: BED file or interval list containing regions (optional)
pattern: "*.{bed,interval_list}"
- fasta:
type: file
description: The reference FASTA file
pattern: "*.{fasta,fa}"
- fasta_fai:
type: file
description: The index of the reference FASTA file
pattern: "*.fai"
- dict:
type: file
description: The sequence dictionary of the reference FASTA file
pattern: "*.dict"
- strtablefile:
type: file
description: The StrTableFile zip folder of the reference FASTA file
pattern: "*.zip"
output:
#Only when we have meta
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- dragstr_model:
type: file
description: The DragSTR model
pattern: "*.txt"
authors:
- "@nvnieuwk"

View file

@ -8,7 +8,7 @@ process GATK4_HAPLOTYPECALLER {
'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }"
input: input:
tuple val(meta), path(input), path(input_index), path(intervals) tuple val(meta), path(input), path(input_index), path(intervals), path(dragstr_model)
path fasta path fasta
path fai path fai
path dict path dict
@ -28,6 +28,7 @@ process GATK4_HAPLOTYPECALLER {
def prefix = task.ext.prefix ?: "${meta.id}" def prefix = task.ext.prefix ?: "${meta.id}"
def dbsnp_command = dbsnp ? "--dbsnp $dbsnp" : "" def dbsnp_command = dbsnp ? "--dbsnp $dbsnp" : ""
def interval_command = intervals ? "--intervals $intervals" : "" def interval_command = intervals ? "--intervals $intervals" : ""
def dragstr_command = dragstr_model ? "--dragstr-params-path $dragstr_model" : ""
def avail_mem = 3 def avail_mem = 3
if (!task.memory) { if (!task.memory) {
@ -42,6 +43,7 @@ process GATK4_HAPLOTYPECALLER {
--reference $fasta \\ --reference $fasta \\
$dbsnp_command \\ $dbsnp_command \\
$interval_command \\ $interval_command \\
$dragstr_command \\
--tmp-dir . \\ --tmp-dir . \\
$args $args

View file

@ -32,6 +32,10 @@ input:
- intervals: - intervals:
type: file type: file
description: Bed file with the genomic regions included in the library (optional) description: Bed file with the genomic regions included in the library (optional)
- dragstr_model:
type: file
description: Text file containing the DragSTR model of the used BAM/CRAM file (optional)
pattern: "*.txt"
- fasta: - fasta:
type: file type: file
description: The reference fasta file description: The reference fasta file

View file

@ -743,6 +743,10 @@ gatk4/calculatecontamination:
- modules/gatk4/calculatecontamination/** - modules/gatk4/calculatecontamination/**
- tests/modules/gatk4/calculatecontamination/** - tests/modules/gatk4/calculatecontamination/**
gatk4/calibratedragstrmodel:
- modules/gatk4/calibratedragstrmodel/**
- tests/modules/gatk4/calibratedragstrmodel/**
gatk4/cnnscorevariants: gatk4/cnnscorevariants:
- modules/gatk4/cnnscorevariants/** - modules/gatk4/cnnscorevariants/**
- tests/modules/gatk4/cnnscorevariants/** - tests/modules/gatk4/cnnscorevariants/**

View file

@ -123,6 +123,7 @@ params {
genome_elfasta = "${test_data_dir}/genomics/homo_sapiens/genome/genome.elfasta" genome_elfasta = "${test_data_dir}/genomics/homo_sapiens/genome/genome.elfasta"
genome_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/genome.fasta" genome_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/genome.fasta"
genome_fasta_fai = "${test_data_dir}/genomics/homo_sapiens/genome/genome.fasta.fai" genome_fasta_fai = "${test_data_dir}/genomics/homo_sapiens/genome/genome.fasta.fai"
genome_strtablefile = "${test_data_dir}/genomics/homo_sapiens/genome/genome_strtablefile.zip"
genome_dict = "${test_data_dir}/genomics/homo_sapiens/genome/genome.dict" genome_dict = "${test_data_dir}/genomics/homo_sapiens/genome/genome.dict"
genome_gff3 = "${test_data_dir}/genomics/homo_sapiens/genome/genome.gff3" genome_gff3 = "${test_data_dir}/genomics/homo_sapiens/genome/genome.gff3"
genome_gtf = "${test_data_dir}/genomics/homo_sapiens/genome/genome.gtf" genome_gtf = "${test_data_dir}/genomics/homo_sapiens/genome/genome.gtf"
@ -265,6 +266,8 @@ params {
test_pileups_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test.pileups.table" test_pileups_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test.pileups.table"
test2_pileups_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test2.pileups.table" test2_pileups_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test2.pileups.table"
test_paired_end_sorted_dragstrmodel = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test_paired_end_sorted_dragstrmodel.txt"
test_genomicsdb_tar_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test_genomicsdb.tar.gz" test_genomicsdb_tar_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test_genomicsdb.tar.gz"
test_pon_genomicsdb_tar_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test_pon_genomicsdb.tar.gz" test_pon_genomicsdb_tar_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test_pon_genomicsdb.tar.gz"

View file

@ -0,0 +1,66 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { GATK4_CALIBRATEDRAGSTRMODEL } from '../../../../modules/gatk4/calibratedragstrmodel/main.nf'
workflow test_gatk4_calibratedragstrmodel_bam {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
[]
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
strtablefile = file(params.test_data['homo_sapiens']['genome']['genome_strtablefile'], checkIfExists: true)
GATK4_CALIBRATEDRAGSTRMODEL ( input, fasta, fasta_fai, dict, strtablefile )
}
workflow test_gatk4_calibratedragstrmodel_cram {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true),
[]
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
strtablefile = file(params.test_data['homo_sapiens']['genome']['genome_strtablefile'], checkIfExists: true)
GATK4_CALIBRATEDRAGSTRMODEL ( input, fasta, fasta_fai, dict, strtablefile )
}
workflow test_gatk4_calibratedragstrmodel_beds {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true),
file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
strtablefile = file(params.test_data['homo_sapiens']['genome']['genome_strtablefile'], checkIfExists: true)
GATK4_CALIBRATEDRAGSTRMODEL ( input, fasta, fasta_fai, dict, strtablefile )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,26 @@
- name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_bam
command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_bam -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config
tags:
- gatk4/calibratedragstrmodel
- gatk4
files:
- path: output/gatk4/test.txt
md5sum: 0a1a1583b157fa2251dd931ed165da4f
- name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_cram
command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config
tags:
- gatk4/calibratedragstrmodel
- gatk4
files:
- path: output/gatk4/test.txt
md5sum: 1aa7ab38023f724877b3323c5e6b9a4e
- name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_beds
command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_beds -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config
tags:
- gatk4/calibratedragstrmodel
- gatk4
files:
- path: output/gatk4/test.txt
md5sum: def8baccad7bd59006f08fcb0a6721bf

View file

@ -8,6 +8,7 @@ workflow test_gatk4_haplotypecaller {
input = [ [ id:'test' ], // meta map input = [ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
[],
[] []
] ]
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
@ -21,6 +22,7 @@ workflow test_gatk4_haplotypecaller_cram {
input = [ [ id:'test' ], // meta map input = [ [ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true),
[],
[] []
] ]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
@ -34,7 +36,8 @@ workflow test_gatk4_haplotypecaller_intervals_dbsnp {
input = [ [ id:'test' ], // meta map input = [ [ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true),
file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true),
[]
] ]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
@ -45,3 +48,20 @@ workflow test_gatk4_haplotypecaller_intervals_dbsnp {
GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, sites, sites_tbi ) GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, sites, sites_tbi )
} }
workflow test_gatk4_haplotypecaller_dragstr_model {
input = [ [ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true),
[],
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_dragstrmodel'], checkIfExists: true)
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
sites = []
sites_tbi = []
GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, sites, sites_tbi )
}

View file

@ -6,7 +6,6 @@
files: files:
- path: output/gatk4/test.vcf.gz - path: output/gatk4/test.vcf.gz
- path: output/gatk4/test.vcf.gz.tbi - path: output/gatk4/test.vcf.gz.tbi
- path: output/gatk4/versions.yml
- name: gatk4 haplotypecaller test_gatk4_haplotypecaller_cram - name: gatk4 haplotypecaller test_gatk4_haplotypecaller_cram
command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config
@ -16,7 +15,6 @@
files: files:
- path: output/gatk4/test.vcf.gz - path: output/gatk4/test.vcf.gz
- path: output/gatk4/test.vcf.gz.tbi - path: output/gatk4/test.vcf.gz.tbi
- path: output/gatk4/versions.yml
- name: gatk4 haplotypecaller test_gatk4_haplotypecaller_intervals_dbsnp - name: gatk4 haplotypecaller test_gatk4_haplotypecaller_intervals_dbsnp
command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_intervals_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_intervals_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config
@ -26,4 +24,12 @@
files: files:
- path: output/gatk4/test.vcf.gz - path: output/gatk4/test.vcf.gz
- path: output/gatk4/test.vcf.gz.tbi - path: output/gatk4/test.vcf.gz.tbi
- path: output/gatk4/versions.yml
- name: gatk4 haplotypecaller test_gatk4_haplotypecaller_dragstr_model
command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_dragstr_model -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config
tags:
- gatk4/haplotypecaller
- gatk4
files:
- path: output/gatk4/test.vcf.gz
- path: output/gatk4/test.vcf.gz.tbi