mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-12-22 11:08:17 +00:00
Merge branch 'master' into new-module-gatk4/reblockgvcf
This commit is contained in:
commit
bf73b6d376
9 changed files with 119 additions and 30 deletions
|
@ -13,6 +13,7 @@ process ENSEMBLVEP {
|
|||
val species
|
||||
val cache_version
|
||||
path cache
|
||||
path fasta
|
||||
path extra_files
|
||||
|
||||
output:
|
||||
|
@ -27,6 +28,8 @@ process ENSEMBLVEP {
|
|||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep"
|
||||
def reference = fasta ? "--fasta $fasta" : ""
|
||||
|
||||
"""
|
||||
mkdir $prefix
|
||||
|
||||
|
@ -34,6 +37,7 @@ process ENSEMBLVEP {
|
|||
-i $vcf \\
|
||||
-o ${prefix}.ann.vcf \\
|
||||
$args \\
|
||||
$reference \\
|
||||
--assembly $genome \\
|
||||
--species $species \\
|
||||
--cache \\
|
||||
|
|
|
@ -36,6 +36,11 @@ input:
|
|||
type: file
|
||||
description: |
|
||||
path to VEP cache (optional)
|
||||
- fasta:
|
||||
type: file
|
||||
description: |
|
||||
reference FASTA file (optional)
|
||||
pattern: "*.{fasta,fa}"
|
||||
- extra_files:
|
||||
type: tuple
|
||||
description: |
|
||||
|
|
|
@ -2,7 +2,7 @@ process GATK4_MARKDUPLICATES_SPARK {
|
|||
tag "$meta.id"
|
||||
label 'process_high'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null)
|
||||
conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0 conda-forge::openjdk=8.0.312" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' :
|
||||
'broadinstitute/gatk:4.2.3.0' }"
|
||||
|
@ -15,6 +15,7 @@ process GATK4_MARKDUPLICATES_SPARK {
|
|||
|
||||
output:
|
||||
tuple val(meta), path("${prefix}"), emit: output
|
||||
tuple val(meta), path("*.metrics"), emit: metrics, optional: true
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
|
@ -25,6 +26,7 @@ process GATK4_MARKDUPLICATES_SPARK {
|
|||
prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def input_list = bam.collect{"--input $it"}.join(' ')
|
||||
|
||||
|
||||
def avail_mem = 3
|
||||
if (!task.memory) {
|
||||
log.info '[GATK MarkDuplicatesSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
|
||||
|
@ -32,8 +34,6 @@ process GATK4_MARKDUPLICATES_SPARK {
|
|||
avail_mem = task.memory.giga
|
||||
}
|
||||
"""
|
||||
export SPARK_USER=spark3
|
||||
|
||||
gatk --java-options "-Xmx${avail_mem}g" MarkDuplicatesSpark \\
|
||||
$input_list \\
|
||||
--output $prefix \\
|
||||
|
@ -45,6 +45,7 @@ process GATK4_MARKDUPLICATES_SPARK {
|
|||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
|
||||
openjdk: \$(echo \$(java -version 2>&1) | grep version | sed 's/\"//g' | cut -f3 -d ' ')
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
||||
|
|
|
@ -58,3 +58,4 @@ authors:
|
|||
- "@ajodeh-juma"
|
||||
- "@FriederikeHanssen"
|
||||
- "@maxulysse"
|
||||
- "@SusiJo"
|
||||
|
|
|
@ -4,11 +4,22 @@ nextflow.enable.dsl = 2
|
|||
|
||||
include { ENSEMBLVEP } from '../../../modules/ensemblvep/main.nf'
|
||||
|
||||
workflow test_ensemblvep {
|
||||
workflow test_ensemblvep_fasta {
|
||||
input = [
|
||||
[ id:'test' ], // meta map
|
||||
file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)
|
||||
]
|
||||
|
||||
ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], [] )
|
||||
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
|
||||
|
||||
ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], fasta, [] )
|
||||
}
|
||||
|
||||
workflow test_ensemblvep_no_fasta {
|
||||
input = [
|
||||
[ id:'test' ], // meta map
|
||||
file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)
|
||||
]
|
||||
|
||||
ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], [], [] )
|
||||
}
|
||||
|
|
|
@ -1,5 +1,13 @@
|
|||
- name: ensemblvep test_ensemblvep
|
||||
command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config
|
||||
- name: ensemblvep test_ensemblvep_fasta
|
||||
command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep_fasta -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config
|
||||
tags:
|
||||
- ensemblvep
|
||||
files:
|
||||
- path: output/ensemblvep/test.ann.vcf
|
||||
- path: output/ensemblvep/test.summary.html
|
||||
|
||||
- name: ensemblvep test_ensemblvep_no_fasta
|
||||
command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep_no_fasta -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config
|
||||
tags:
|
||||
- ensemblvep
|
||||
files:
|
||||
|
|
|
@ -3,26 +3,55 @@
|
|||
nextflow.enable.dsl = 2
|
||||
|
||||
include { GATK4_MARKDUPLICATES_SPARK } from '../../../../modules/gatk4/markduplicatesspark/main.nf'
|
||||
include { GATK4_MARKDUPLICATES_SPARK as GATK4_MARKDUPLICATES_SPARK_CRAM } from '../../../../modules/gatk4/markduplicatesspark/main.nf'
|
||||
include { GATK4_MARKDUPLICATES_SPARK as GATK4_MARKDUPLICATES_SPARK_METRICS } from '../../../../modules/gatk4/markduplicatesspark/main.nf'
|
||||
|
||||
workflow test_gatk4_markduplicates_spark {
|
||||
input = [ [ id:'test', single_end:false ], // meta map
|
||||
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
|
||||
]
|
||||
fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
|
||||
fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)
|
||||
dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true)
|
||||
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
|
||||
fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
|
||||
dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
|
||||
|
||||
GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict )
|
||||
}
|
||||
|
||||
// chr 22
|
||||
workflow test_gatk4_markduplicates_spark_multiple_bams {
|
||||
input = [ [ id:'test', single_end:false ], // meta map
|
||||
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
|
||||
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true)
|
||||
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true),
|
||||
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true)
|
||||
] ]
|
||||
fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
|
||||
fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)
|
||||
dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true)
|
||||
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
|
||||
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
|
||||
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
|
||||
|
||||
GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict )
|
||||
}
|
||||
|
||||
// chr 22
|
||||
workflow test_gatk4_markduplicates_spark_multiple_bams_cram_out {
|
||||
input = [ [ id:'test', single_end:false ], // meta map
|
||||
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true),
|
||||
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true)
|
||||
] ]
|
||||
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
|
||||
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
|
||||
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
|
||||
|
||||
GATK4_MARKDUPLICATES_SPARK_CRAM ( input, fasta, fai, dict )
|
||||
}
|
||||
|
||||
// chr 22
|
||||
workflow test_gatk4_markduplicates_spark_multiple_bams_metrics {
|
||||
input = [ [ id:'test', single_end:false ], // meta map
|
||||
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true),
|
||||
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true)
|
||||
] ]
|
||||
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
|
||||
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
|
||||
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
|
||||
|
||||
GATK4_MARKDUPLICATES_SPARK_METRICS ( input, fasta, fai, dict )
|
||||
}
|
||||
|
|
|
@ -2,4 +2,18 @@ process {
|
|||
|
||||
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
|
||||
|
||||
withName: GATK4_MARKDUPLICATES_SPARK {
|
||||
ext.prefix = { "${meta.id}.bam" }
|
||||
}
|
||||
withName: GATK4_MARKDUPLICATES_SPARK_CRAM {
|
||||
ext.prefix = { "${meta.id}.cram" }
|
||||
}
|
||||
withName: GATK4_MARKDUPLICATES_SPARK_METRICS {
|
||||
ext.args = '--metrics-file test.metrics'
|
||||
ext.prefix = { "${meta.id}.bam" }
|
||||
}
|
||||
|
||||
}
|
||||
// override tests/config/nextflow.config
|
||||
docker.userEmulation = false
|
||||
|
||||
|
|
|
@ -1,25 +1,41 @@
|
|||
- name: gatk4 markduplicates test_gatk4_markduplicates_spark
|
||||
command: nextflow run tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
|
||||
- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark
|
||||
command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
|
||||
tags:
|
||||
- gatk4
|
||||
- gatk4/markduplicatesspark
|
||||
files:
|
||||
- path: output/gatk4/test.bai
|
||||
md5sum: e9c125e82553209933883b4fe2b8d7c2
|
||||
- path: output/gatk4/test.bam
|
||||
md5sum: 2efd50b2e6b7fd9bdf242cd9e266cfa9
|
||||
- path: output/gatk4/test.metrics
|
||||
md5sum: dc1a09ac6371aab7c50d1a554baa06d3
|
||||
- path: output/gatk4/versions.yml
|
||||
|
||||
- name: gatk4 markduplicates test_gatk4_markduplicates_spark_multiple_bams
|
||||
command: nextflow run tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
|
||||
- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams
|
||||
command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
|
||||
tags:
|
||||
- gatk4
|
||||
- gatk4/markduplicatesspark
|
||||
files:
|
||||
- path: output/gatk4/test.bai
|
||||
md5sum: bad71df9c876e72a5bc0a3e0fd755f92
|
||||
- path: output/gatk4/test.bam
|
||||
md5sum: 8187febc6108ffef7f907e89b9c091a4
|
||||
- path: output/gatk4/test.metrics
|
||||
md5sum: 898cb0a6616897d8ada90bab53bf0837
|
||||
- path: output/gatk4/versions.yml
|
||||
|
||||
- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_cram_out
|
||||
command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_cram_out -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
|
||||
tags:
|
||||
- gatk4
|
||||
- gatk4/markduplicatesspark
|
||||
files:
|
||||
- path: output/gatk4/test.cram
|
||||
md5sum: 2271016de5e4199736598f39d12d7587
|
||||
- path: output/gatk4/versions.yml
|
||||
|
||||
- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_metrics
|
||||
command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_metrics -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
|
||||
tags:
|
||||
- gatk4
|
||||
- gatk4/markduplicatesspark
|
||||
files:
|
||||
- path: output/gatk4/test.bam
|
||||
md5sum: 898cb0a6616897d8ada90bab53bf0837
|
||||
- path: output/gatk4/test.metrics
|
||||
contains: ["## METRICS CLASS", "org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics"]
|
||||
- path: output/gatk4/versions.yml
|
||||
|
|
Loading…
Reference in a new issue