@ -19,10 +19,11 @@ process BEDTOOLS_SORT {
tuple val(meta), path(bed)
tuple val(meta), path(intervals)
val extension
tuple val(meta), path('*.bed'), emit: bed
tuple val(meta), path("*.${extension}"), emit: sorted
path "versions.yml" , emit: versions
@ -30,9 +31,9 @@ process BEDTOOLS_SORT {
bedtools \\
sort \\
-i $bed \\
-i $intervals \\
$options.args \\
> ${prefix}.bed
> ${prefix}.${extension}
cat <<-END_VERSIONS > versions.yml
@ -15,20 +15,26 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bed:
- intervals:
type: file
description: Input BED file
pattern: "*.{bed}"
description: BED/BEDGRAPH
pattern: "*.{bed|bedGraph}"
- extension:
type: string
description: Extension of the output file (e. g., ".bg", ".bedgraph", ".txt", ".tab", etc.) It is set arbitrarily by the user and corresponds to the file format which depends on arguments.
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bed:
- sorted:
type: file
description: Sorted BED file
pattern: "*.{bed}"
description: Sorted output file
pattern: "*.${extension}"
- versions:
type: file
description: File containing software versions
@ -37,3 +43,4 @@ authors:
- "@Emiller88"
- "@sruthipsuresh"
- "@drpatelh"
- "@chris-cheshire"
@ -0,0 +1,78 @@
// Utility functions used in nf-core DSL2 module files
// Extract name of software tool from process name using $task.process
def getSoftwareName(task_process) {
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
// Extract name of module from process name using $task.process
def getProcessName(task_process) {
return task_process.tokenize(':')[-1]
// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
def initOptions(Map args) {
def Map options = [:]
options.args = args.args ?: ''
options.args2 = args.args2 ?: ''
options.args3 = args.args3 ?: ''
options.publish_by_meta = args.publish_by_meta ?: []
options.publish_dir = args.publish_dir ?: ''
options.publish_files = args.publish_files
options.suffix = args.suffix ?: ''
return options
// Tidy up and join elements of a list to return a path string
def getPathFromList(path_list) {
def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
return paths.join('/')
// Function to save/publish module results
def saveFiles(Map args) {
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
// Do not publish versions.yml unless running from pytest workflow
if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
return null
if (ioptions.publish_by_meta) {
def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
for (key in key_list) {
if (args.meta && key instanceof String) {
def path = key
if (args.meta.containsKey(key)) {
path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
path = path instanceof String ? path : ''
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
return "${getPathFromList(ext_list)}/$args.filename"
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
@ -0,0 +1,50 @@
// Import generic module functions
include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'
params.options = [:]
options = initOptions(params.options)
tag "$meta.id"
label 'process_low'
publishDir "${params.outdir}",
mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }
conda (params.enable_conda ? "bioconda::fgbio=1.4.0" : null)
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
container "https://depot.galaxyproject.org/singularity/fgbio:1.4.0--hdfd78af_0"
} else {
container "quay.io/biocontainers/fgbio:1.4.0--hdfd78af_0"
tuple val(meta), path(taggedbam)
tuple val(meta), path("*_umi-grouped.bam") , emit: bam
tuple val(meta), path("*_umi_histogram.txt"), emit: histogram
path "versions.yml" , emit: versions
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
mkdir tmp
fgbio \\
--tmp-dir=${PWD}/tmp \\
GroupReadsByUmi \\
-s $strategy \\
${options.args} \\
-i $taggedbam \\
-o ${prefix}_umi-grouped.bam \\
-f ${prefix}_umi_histogram.txt
cat <<-END_VERSIONS > versions.yml
${getSoftwareName(task.process)}: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
@ -0,0 +1,59 @@
name: fgbio_groupreadsbyumi
description: |
Groups reads together that appear to have come from the same original molecule.
Reads are grouped by template, and then templates are sorted by the 5’ mapping positions
of the reads from the template, used from earliest mapping position to latest.
Reads that have the same end positions are then sub-grouped by UMI sequence.
(!) Note: the MQ tag is required on reads with mapped mates (!)
This can be added using samblaster with the optional argument --addMateTags.
- groupreads
- fgbio
- fgbio:
description: A set of tools for working with genomic and high throughput sequencing data, including UMIs
homepage: http://fulcrumgenomics.github.io/fgbio/
documentation: http://fulcrumgenomics.github.io/fgbio/tools/latest/
tool_dev_url: https://github.com/fulcrumgenomics/fgbio
doi: ""
licence: ['MIT']
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: |
BAM file. Note: the MQ tag is required on reads with mapped mates (!)
pattern: "*.bam"
- strategy:
type: value
description: |
Reguired argument: defines the UMI assignment strategy.
Must be chosen among: Identity, Edit, Adjacency, Paired.
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- bam:
type: file
description: UMI-grouped BAM
pattern: "*.bam"
- histogram:
type: file
description: A text file containing the tag family size counts
pattern: "*.txt"
- "@lescai"
@ -19,7 +19,7 @@ process GATK4_APPLYBQSR {
tuple val(meta), path(bam), path(bai), path(bqsr_table)
tuple val(meta), path(input), path(input_index), path(bqsr_table)
path fasta
path fastaidx
path dict
@ -32,12 +32,18 @@ process GATK4_APPLYBQSR {
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def interval = intervals ? "-L ${intervals}" : ""
if (!task.memory) {
log.info '[GATK ApplyBQSR] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
gatk ApplyBQSR \\
-R $fasta \\
-I $bam \\
-I $input \\
--bqsr-recal-file $bqsr_table \\
$interval \\
--tmp-dir . \\
-O ${prefix}.bam \\
@ -20,10 +20,14 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
- input:
type: file
description: BAM file from alignment
pattern: "*.{bam}"
description: BAM/CRAM file from alignment
pattern: "*.{bam,cram}"
- input_index:
type: file
description: BAI/CRAI file from alignment
pattern: "*.{bai,crai}"
- bqsr_table:
type: file
description: Recalibration table from gatk4_baserecalibrator
@ -19,7 +19,7 @@ process GATK4_BASERECALIBRATOR {
tuple val(meta), path(bam), path(bai)
tuple val(meta), path(input), path(input_index)
path fasta
path fastaidx
path dict
@ -35,12 +35,19 @@ process GATK4_BASERECALIBRATOR {
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def intervalsCommand = intervalsBed ? "-L ${intervalsBed}" : ""
def sitesCommand = knownSites.collect{"--known-sites ${it}"}.join(' ')
if (!task.memory) {
log.info '[GATK BaseRecalibrator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
gatk BaseRecalibrator \
-R $fasta \
-I $bam \
-I $input \
$sitesCommand \
$intervalsCommand \
--tmp-dir . \
$options.args \
-O ${prefix}.table
@ -20,10 +20,14 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
- input:
type: file
description: BAM file from alignment
pattern: "*.{bam}"
description: BAM/CRAM file from alignment
pattern: "*.{bam,cram}"
- input_index:
type: file
description: BAI/CRAI file from alignment
pattern: "*.{bai,crai}"
- fasta:
type: file
description: The reference fasta file
@ -57,3 +61,4 @@ output:
- "@yocra3"
- "@FriederikeHanssen"
@ -19,10 +19,13 @@ process GATK4_HAPLOTYPECALLER {
tuple val(meta), path(bam), path(bai)
tuple val(meta), path(input), path(input_index)
path fasta
path fai
path dict
path dbsnp
path dbsnp_tbi
path interval
tuple val(meta), path("*.vcf.gz"), emit: vcf
@ -31,6 +34,8 @@ process GATK4_HAPLOTYPECALLER {
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def interval_option = interval ? "-L ${interval}" : ""
def dbsnp_option = dbsnp ? "-D ${dbsnp}" : ""
def avail_mem = 3
if (!task.memory) {
log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
@ -42,9 +47,12 @@ process GATK4_HAPLOTYPECALLER {
--java-options "-Xmx${avail_mem}g" \\
HaplotypeCaller \\
-R $fasta \\
-I $bam \\
-I $input \\
${dbsnp_option} \\
${interval_option} \\
-O ${prefix}.vcf.gz \\
$options.args \\
--tmp-dir .
cat <<-END_VERSIONS > versions.yml
@ -21,14 +21,14 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
- input:
type: file
description: BAM file
pattern: "*.bam"
- bai:
description: BAM/CRAM file from alignment
pattern: "*.{bam,cram}"
- input_index:
type: file
description: Index of BAM file
pattern: "*.bam.bai"
description: BAI/CRAI file from alignment
pattern: "*.{bai,crai}"
- fasta:
type: file
description: The reference fasta file
@ -41,6 +41,16 @@ input:
type: file
description: GATK sequence dictionary
pattern: "*.dict"
- dbsnp:
type: file
description: VCF file containing known sites (optional)
- dbsnp_tbi:
type: file
description: VCF index of dbsnp (optional)
- interval:
type: file
description: Bed file with the genomic regions included in the library (optional)
- meta:
type: map
@ -62,3 +72,4 @@ output:
- "@suzannejin"
- "@FriederikeHanssen"
@ -19,21 +19,28 @@ process GATK4_MARKDUPLICATES {
tuple val(meta), path(bam)
tuple val(meta), path(bams)
tuple val(meta), path("*.bam") , emit: bam
tuple val(meta), path("*.bai") , emit: bai
tuple val(meta), path("*.metrics"), emit: metrics
path "versions.yml" , emit: versions
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def bam_list = bams.collect(){ bam -> "--INPUT ".concat(bam.toString()) }.join(" ")
def avail_mem = 3
if (!task.memory) {
log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
gatk MarkDuplicates \\
--INPUT $bam \\
$bam_list \\
--METRICS_FILE ${prefix}.metrics \\
--TMP_DIR . \\
--ASSUME_SORT_ORDER coordinate \\
--CREATE_INDEX true \\
--OUTPUT ${prefix}.bam \\
@ -47,3 +47,4 @@ output:
- "@ajodeh-juma"
- "@FriederikeHanssen"
@ -19,9 +19,11 @@ process GATK4_MUTECT2 {
tuple val(meta) , path(bam) , path(bai) , val(which_norm)
tuple val(meta) , path(input) , path(input_index) , val(which_norm)
val run_single
val run_pon
val run_mito
val interval_label
path fasta
path fastaidx
path dict
@ -39,35 +41,34 @@ process GATK4_MUTECT2 {
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def inputsList = []
def normalsList = []
def inputsCommand = ''
def panelsCommand = ''
def normalsCommand = ''
def panels_command = ''
def normals_command = ''
bam.each() {a -> inputsList.add(" -I " + a ) }
inputsCommand = inputsList.join( ' ')
def inputs_command = '-I ' + input.join( ' -I ')
if(run_pon) {
panelsCommand = ''
normalsCommand = ''
panels_command = ''
normals_command = ''
} else if(run_single) {
panelsCommand = " --germline-resource $germline_resource --panel-of-normals $panel_of_normals"
normalsCommand = ''
panels_command = " --germline-resource $germline_resource --panel-of-normals $panel_of_normals"
normals_command = ''
} else if(run_mito){
panels_command = "-L ${interval_label} --mitochondria-mode"
normals_command = ''
} else {
panelsCommand = " --germline-resource $germline_resource --panel-of-normals $panel_of_normals --f1r2-tar-gz ${prefix}.f1r2.tar.gz"
which_norm.each() {a -> normalsList.add(" -normal " + a ) }
normalsCommand = normalsList.join( ' ')
panels_command = " --germline-resource $germline_resource --panel-of-normals $panel_of_normals --f1r2-tar-gz ${prefix}.f1r2.tar.gz"
normals_command = '-normal ' + which_norm.join( ' -normal ')
gatk Mutect2 \\
-R ${fasta} \\
${inputsCommand} \\
${normalsCommand} \\
${panelsCommand} \\
${inputs_command} \\
${normals_command} \\
${panels_command} \\
-O ${prefix}.vcf.gz \\
@ -22,23 +22,34 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test']
- bam:
- input:
type: list
description: list of BAM files
pattern: "*.bam"
- bai:
description: list of BAM files, also able to take CRAM as an input
pattern: "*.{bam/cram}"
- input_index:
type: list
description: list of BAM file indexes
pattern: "*.bam.bai"
description: list of BAM file indexes, also able to take CRAM indexes as an input
pattern: "*.{bam.bai/cram.crai}"
- which_norm:
type: list
description: optional list of sample headers contained in the normal sample bam files (these are required for tumor_normal_pair mode)
pattern: "testN"
- run_single:
type: boolean
description: Specify whether or not to run in tumor_single mode instead of tumor_normal_pair mode (will be ignored if run_pon is also true)
pattern: "true/false"
- run_pon:
type: boolean
description: Specify whether or not to run in panel_of_normal mode instead of tumor_normal_pair mode
pattern: "true/false"
- run_mito:
type: boolean
description: Specify whether or not to run in mitochondria-mode instead of tumor_normal_pair mode
pattern: "true/false"
- interval_label:
type: string
description: Specify the label used for mitochondrial chromosome when mutect2 is run in mitochondria mode.
pattern: "chrM"
- fasta:
type: file
description: The reference fasta file
@ -0,0 +1,49 @@
// Import generic module functions
include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'
params.options = [:]
options = initOptions(params.options)
tag "${name}"
label 'process_long'
publishDir "${params.outdir}",
mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) }
conda (params.enable_conda ? "bioconda::khmer=3.0.0a3" : null)
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
container "https://depot.galaxyproject.org/singularity/khmer:3.0.0a3--py37haa7609a_2"
} else {
container "quay.io/biocontainers/khmer:3.0.0a3--py37haa7609a_2"
path pe_reads
path se_reads
val name
path "${name}.fastq.gz", emit: reads
path "versions.yml" , emit: versions
pe_args = pe_reads ? "--paired" : ""
se_args = se_reads ? "--unpaired-reads ${se_reads}" : ""
files = pe_reads ? pe_reads : se_reads
normalize-by-median.py \\
-M ${task.memory.toGiga()}e9 \\
--gzip ${options.args} \\
-o ${name}.fastq.gz \\
${pe_args} \\
${se_args} \\
cat <<-END_VERSIONS > versions.yml
${getSoftwareName(task.process)}: \$( normalize-by-median.py --version 2>&1 | grep ^khmer | sed 's/^khmer //' )
@ -0,0 +1,39 @@
name: khmer_normalizebymedian
description: Module that calls normalize-by-median.py from khmer. The module can take a mix of paired end (interleaved) and single end reads. If both types are provided, only a single file with single ends is possible.
- digital normalization
- khmer
- khmer:
description: khmer k-mer counting library
homepage: https://github.com/dib-lab/khmer
documentation: https://khmer.readthedocs.io/en/latest/
tool_dev_url: https://github.com/dib-lab/khmer
doi: "https://doi.org/10.12688/f1000research.6924.1"
licence: ['BSD License']
- pe_reads:
type: files
description: Paired-end interleaved fastq files
pattern: "*.{fq,fastq}.gz"
- se_reads:
type: files
description: Single-end fastq files
pattern: "*.{fq,fastq}.gz"
- name:
type: string
description: filename for output file(s); ".fastq.gz" will be appended
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- reads:
type: file
description: Interleaved fastq files
pattern: "*.{fq,fastq}.gz"
- "@erikrikarddaniel"
@ -19,7 +19,7 @@ process MANTA_GERMLINE {
tuple val(meta), path(cram), path(crai)
tuple val(meta), path(input), path(input_index)
path fasta
path fai
path target_bed
@ -39,7 +39,7 @@ process MANTA_GERMLINE {
def options_manta = target_bed ? "--exome --callRegions $target_bed" : ""
configManta.py \
--bam $cram \
--bam $input \
--reference $fasta \
$options_manta \
--runDir manta
@ -23,11 +23,11 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- cram:
- input:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- crai:
- input_index:
type: file
description: BAM/CRAM/SAM index file
pattern: "*.{bai,crai,sai}"
@ -19,7 +19,7 @@ process MANTA_SOMATIC {
tuple val(meta), path(cram_normal), path(crai_normal), path(cram_tumor), path(crai_tumor)
tuple val(meta), path(input_normal), path(input_index_normal), path(input_tumor), path(input_index_tumor)
path fasta
path fai
path target_bed
@ -42,8 +42,8 @@ process MANTA_SOMATIC {
configManta.py \
--tumorBam $cram_tumor \
--normalBam $cram_normal \
--tumorBam $input_tumor \
--normalBam $input_normal \
--reference $fasta \
$options_manta \
--runDir manta
@ -23,19 +23,19 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- cram_normal:
- input_normal:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- crai_normal:
- input_index_normal:
type: file
description: BAM/CRAM/SAM index file
pattern: "*.{bai,crai,sai}"
- cram_tumor:
- input_tumor:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- crai_tumor:
- input_index_tumor:
type: file
description: BAM/CRAM/SAM index file
pattern: "*.{bai,crai,sai}"
@ -19,7 +19,7 @@ process MANTA_TUMORONLY {
tuple val(meta), path(cram), path(crai)
tuple val(meta), path(input), path(input_index)
path fasta
path fai
path target_bed
@ -39,7 +39,7 @@ process MANTA_TUMORONLY {
def options_manta = target_bed ? "--exome --callRegions $target_bed" : ""
configManta.py \
--tumorBam $cram \
--tumorBam $input \
--reference $fasta \
$options_manta \
--runDir manta
@ -23,11 +23,11 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- cram:
- input:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- crai:
- input_index:
type: file
description: BAM/CRAM/SAM index file
pattern: "*.{bai,crai,sai}"
@ -54,7 +54,6 @@ output:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- candidate_small_indels_vcf:
type: file
description: Gzipped VCF file containing variants
@ -19,16 +19,20 @@ process SAMTOOLS_MERGE {
tuple val(meta), path(bams)
tuple val(meta), path(input_files)
path fasta
tuple val(meta), path("${prefix}.bam"), emit: bam
tuple val(meta), path("${prefix}.bam"), optional:true, emit: bam
tuple val(meta), path("${prefix}.cram"), optional:true, emit: cram
path "versions.yml" , emit: versions
prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def file_type = input_files[0].getExtension()
def reference = fasta ? "--reference ${fasta}" : ""
samtools merge ${prefix}.bam $bams
samtools merge ${reference} ${prefix}.${file_type} $input_files
cat <<-END_VERSIONS > versions.yml
${getSoftwareName(task.process)}: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
@ -1,5 +1,5 @@
name: samtools_merge
description: Merge BAM file
description: Merge BAM or CRAM file
- merge
- bam
@ -21,20 +21,28 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
- input_files:
type: file
description: BAM file
description: BAM/CRAM file
pattern: "*.{bam,cram,sam}"
- fasta:
type: optional file
description: Reference file the CRAM was created with
pattern: "*.{fasta,fa}"
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- merged_bam:
- bam:
type: file
description: BAM file
pattern: "*.{bam}"
- cram:
type: file
description: CRAM file
pattern: "*.{cram}"
- versions:
type: file
description: File containing software versions
@ -43,3 +51,4 @@ authors:
- "@drpatelh"
- "@yuukiiwa "
- "@maxulysse"
- "@FriederikeHanssen"
@ -19,15 +19,17 @@ process SAMTOOLS_STATS {
tuple val(meta), path(bam), path(bai)
tuple val(meta), path(input), path(input_index)
path fasta
tuple val(meta), path("*.stats"), emit: stats
path "versions.yml" , emit: versions
def reference = fasta ? "--reference ${fasta}" : ""
samtools stats $bam > ${bam}.stats
samtools stats ${reference} ${input} > ${input}.stats
cat <<-END_VERSIONS > versions.yml
${getSoftwareName(task.process)}: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
@ -22,14 +22,18 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
- input:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- bai:
description: BAM/CRAM file from alignment
pattern: "*.{bam,cram}"
- input_index:
type: file
description: Index for BAM/CRAM/SAM file
pattern: "*.{bai,crai,sai}"
description: BAI/CRAI file from alignment
pattern: "*.{bai,crai}"
- fasta:
type: optional file
description: Reference file the CRAM was created with
pattern: "*.{fasta,fa}"
- meta:
type: map
@ -46,3 +50,4 @@ output:
pattern: "versions.yml"
- "@drpatelh"
- "@FriederikeHanssen"
@ -19,16 +19,20 @@ process SAMTOOLS_VIEW {
tuple val(meta), path(bam)
tuple val(meta), path(input)
path fasta
tuple val(meta), path("*.bam"), emit: bam
tuple val(meta), path("*.bam") , optional: true, emit: bam
tuple val(meta), path("*.cram"), optional: true, emit: cram
path "versions.yml" , emit: versions
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def reference = fasta ? "--reference ${fasta} -C" : ""
def file_type = input.getExtension()
samtools view $options.args $bam > ${prefix}.bam
samtools view ${reference} $options.args $input > ${prefix}.${file_type}
cat <<-END_VERSIONS > versions.yml
${getSoftwareName(task.process)}: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
@ -21,10 +21,14 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
- input:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- fasta:
type: optional file
description: Reference file the CRAM was created with
pattern: "*.{fasta,fa}"
- meta:
type: map
@ -33,8 +37,12 @@ output:
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: filtered/converted BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
description: filtered/converted BAM/SAM file
pattern: "*.{bam,sam}"
- cram:
type: file
description: filtered/converted CRAM file
pattern: "*.cram"
- versions:
type: file
description: File containing software versions
@ -42,3 +50,4 @@ output:
- "@drpatelh"
- "@joseespinosa"
- "@FriederikeHanssen"
@ -19,7 +19,7 @@ process STRELKA_GERMLINE {
tuple val(meta), path(bam), path(bai)
tuple val(meta), path(input), path(input_index)
path fasta
path fai
path target_bed
@ -38,7 +38,7 @@ process STRELKA_GERMLINE {
def regions = target_bed ? "--exome --callRegions ${target_bed}" : ""
configureStrelkaGermlineWorkflow.py \\
--bam $bam \\
--bam $input \\
--referenceFasta $fasta \\
$regions \\
$options.args \\
@ -21,14 +21,14 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test']
- bam:
- input:
type: file
description: BAM file
pattern: "*.{bam}"
- bai:
description: BAM/CRAM file
pattern: "*.{bam,cram}"
- input_index:
type: file
description: BAM index file
pattern: "*.{bai}"
description: BAM/CRAI index file
pattern: "*.{bai,crai}"
- target_bed:
type: file
description: An optional bed file
@ -19,7 +19,7 @@ process STRELKA_SOMATIC {
tuple val(meta), path(cram_normal), path(crai_normal), path(cram_tumor), path(crai_tumor), path(manta_candidate_small_indels), path(manta_candidate_small_indels_tbi)
tuple val(meta), path(input_normal), path(input_index_normal), path(input_tumor), path(input_index_tumor), path(manta_candidate_small_indels), path(manta_candidate_small_indels_tbi)
path fasta
path fai
path target_bed
@ -38,8 +38,8 @@ process STRELKA_SOMATIC {
def options_manta = manta_candidate_small_indels ? "--indelCandidates ${manta_candidate_small_indels}" : ""
configureStrelkaSomaticWorkflow.py \\
--tumor $cram_tumor \\
--normal $cram_normal \\
--tumor $input_tumor \\
--normal $input_normal \\
--referenceFasta $fasta \\
$options_target_bed \\
$options_manta \\
@ -21,19 +21,19 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- cram_normal:
- input_normal:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- crai_normal:
- input_index_normal:
type: file
description: BAM/CRAM/SAM index file
pattern: "*.{bai,crai,sai}"
- cram_tumor:
- input_tumor:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- crai_tumor:
- input_index_tumor:
type: file
description: BAM/CRAM/SAM index file
pattern: "*.{bai,crai,sai}"
@ -15,7 +15,7 @@ workflow BAM_STATS_SAMTOOLS {
ch_versions = Channel.empty()
SAMTOOLS_STATS ( ch_bam_bai )
SAMTOOLS_STATS ( ch_bam_bai, [] )
ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first())
SAMTOOLS_FLAGSTAT ( ch_bam_bai )
@ -394,6 +394,10 @@ fgbio/fastqtobam:
- modules/fgbio/fastqtobam/**
- tests/modules/fgbio/fastqtobam/**
- modules/fgbio/groupreadsbyumi/**
- tests/modules/fgbio/groupreadsbyumi/**
- modules/fgbio/sortbam/**
- tests/modules/fgbio/sortbam/**
@ -629,6 +633,10 @@ kallistobustools/ref:
- modules/kallistobustools/ref/**
- tests/modules/kallistobustools/ref/**
- modules/khmer/normalizebymedian/**
- tests/modules/khmer/normalizebymedian/**
- modules/kleborate/**
- tests/modules/kleborate/**
@ -9,5 +9,5 @@ workflow test_bedtools_sort {
file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true)
BEDTOOLS_SORT ( input, "testext" )
@ -4,5 +4,5 @@
- bedtools
- bedtools/sort
- path: ./output/bedtools/test_out.bed
- path: ./output/bedtools/test_out.testext
md5sum: fe4053cf4de3aebbdfc3be2efb125a74
@ -0,0 +1,15 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { FGBIO_GROUPREADSBYUMI } from '../../../../modules/fgbio/groupreadsbyumi/main.nf' addParams( options: [:] )
workflow test_fgbio_groupreadsbyumi {
input = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_umi_unsorted_tagged_bam'], checkIfExists: true) ]
strategy = "Adjacency"
FGBIO_GROUPREADSBYUMI ( input, strategy )
@ -0,0 +1,10 @@
- name: fgbio groupreadsbyumi test_fgbio_groupreadsbyumi
command: nextflow run tests/modules/fgbio/groupreadsbyumi -entry test_fgbio_groupreadsbyumi -c tests/config/nextflow.config
- fgbio
- fgbio/groupreadsbyumi
- path: output/fgbio/test_umi-grouped.bam
md5sum: f1e53fc845fd99a3da172eb8063dff0b
- path: output/fgbio/test_umi_histogram.txt
md5sum: d17fd167b2a765d46e4b01bf08ece01b
@ -30,3 +30,17 @@ workflow test_gatk4_applybqsr_intervals {
GATK4_APPLYBQSR ( input, fasta, fai, dict, intervals )
workflow test_gatk4_applybqsr_cram {
input = [ [ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_baserecalibrator_table'], checkIfExists: true)
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
intervals = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)
GATK4_APPLYBQSR ( input, fasta, fai, dict, intervals )
@ -1,17 +1,26 @@
- name: gatk4 applybqsr test_gatk4_applybqsr
command: nextflow run tests/modules/gatk4/applybqsr -entry test_gatk4_applybqsr -c tests/config/nextflow.config
- gatk4
- gatk4/applybqsr
- gatk4
- path: output/gatk4/test.bam
md5sum: dac716c394db5e83c12b44355c098ca7
md5sum: 87a2eabae2b7b41574f966612b5addae
- name: gatk4 applybqsr test_gatk4_applybqsr_intervals
command: nextflow run tests/modules/gatk4/applybqsr -entry test_gatk4_applybqsr_intervals -c tests/config/nextflow.config
- gatk4
- gatk4/applybqsr
- gatk4
- path: output/gatk4/test.bam
md5sum: 400441dbe5344658580ba0a24ba57069
md5sum: 9c015d3c1dbd9eee793b7386f432b6aa
- name: gatk4 applybqsr test_gatk4_applybqsr_cram
command: nextflow run tests/modules/gatk4/applybqsr -entry test_gatk4_applybqsr_cram -c tests/config/nextflow.config
- gatk4/applybqsr
- gatk4
- path: output/gatk4/test.bam
md5sum: 02f84815fdbc99c21c8d42ebdcabbbf7
@ -18,6 +18,21 @@ workflow test_gatk4_baserecalibrator {
GATK4_BASERECALIBRATOR ( input, fasta, fai, dict, [], sites, sites_tbi )
workflow test_gatk4_baserecalibrator_cram {
input = [ [ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_baserecalibrator_table'], checkIfExists: true)
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
sites = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true)
sites_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true)
GATK4_BASERECALIBRATOR ( input, fasta, fai, dict, [], sites, sites_tbi )
workflow test_gatk4_baserecalibrator_intervals {
input = [ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
@ -1,17 +1,26 @@
- name: gatk4 baserecalibrator test_gatk4_baserecalibrator
command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator -c tests/config/nextflow.config
- gatk4/baserecalibrator
- gatk4
- gatk4/baserecalibrator
- path: output/gatk4/test.table
md5sum: e2e43abdc0c943c1a54dae816d0b9ea7
- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_cram
command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_cram -c tests/config/nextflow.config
- gatk4
- gatk4/baserecalibrator
- path: output/gatk4/test.table
md5sum: 35d89a3811aa31711fc9815b6b80e6ec
- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_intervals
command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_intervals -c tests/config/nextflow.config
- gatk4/baserecalibrator
- gatk4
- gatk4/baserecalibrator
- path: output/gatk4/test.table
md5sum: 9ecb5f00a2229291705addc09c0ec231
@ -19,8 +28,8 @@
- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_multiple_sites
command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_multiple_sites -c tests/config/nextflow.config
- gatk4/baserecalibrator
- gatk4
- gatk4/baserecalibrator
- path: output/gatk4/test.table
md5sum: e2e43abdc0c943c1a54dae816d0b9ea7
@ -13,5 +13,33 @@ workflow test_gatk4_haplotypecaller {
fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict )
GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, [], [], [] )
workflow test_gatk4_haplotypecaller_cram {
input = [ [ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true)
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, [], [], [] )
workflow test_gatk4_haplotypecaller_intervals_dbsnp {
input = [ [ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true)
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
sites = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true)
sites_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true)
intervals = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)
GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, sites, sites_tbi, intervals )
@ -1,13 +1,26 @@
- name: gatk4 haplotypecaller test_gatk4_haplotypecaller
command: nextflow run tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller -c tests/config/nextflow.config
- gatk4
- gatk4/haplotypecaller
- gatk4
- path: output/gatk4/test.vcf.gz
- path: output/gatk4/test.vcf.gz.tbi
- name: gatk4 haplotypecaller test_gatk4_haplotypecaller_cram
command: nextflow run tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_cram -c tests/config/nextflow.config
- gatk4/haplotypecaller
- gatk4
- path: output/gatk4/test.vcf.gz
- path: output/gatk4/test.vcf.gz.tbi
- name: gatk4 haplotypecaller test_gatk4_haplotypecaller_intervals_dbsnp
command: nextflow run tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_intervals_dbsnp -c tests/config/nextflow.config
- gatk4/haplotypecaller
- gatk4
- path: output/gatk4/test.vcf.gz
should_exist: true
- 'MT192765.1'
- '54.60'
- '37.32'
- path: output/gatk4/test.vcf.gz.tbi
@ -11,3 +11,12 @@ workflow test_gatk4_markduplicates {
workflow test_gatk4_markduplicates_multiple_bams {
input = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true)
@ -1,8 +1,23 @@
- name: gatk4 markduplicates test_gatk4_markduplicates
command: nextflow run tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates -c tests/config/nextflow.config
- gatk4
- gatk4/markduplicates
- gatk4
- path: output/gatk4/test.bai
md5sum: e9c125e82553209933883b4fe2b8d7c2
- path: output/gatk4/test.bam
md5sum: 3b6facab3afbacfa08a7a975efbd2c6b
md5sum: bda9a7bf5057f2288ed70be3eb8a753f
- path: output/gatk4/test.metrics
- name: gatk4 markduplicates test_gatk4_markduplicates_multiple_bams
command: nextflow run tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates_multiple_bams -c tests/config/nextflow.config
- gatk4/markduplicates
- gatk4
- path: output/gatk4/test.bai
md5sum: 93cebe29e7cca2064262b739235cca9b
- path: output/gatk4/test.bam
md5sum: dcd6f584006b04141fb787001a8ecacc
- path: output/gatk4/test.metrics
@ -3,6 +3,8 @@
nextflow.enable.dsl = 2
include { GATK4_MUTECT2 } from '../../../../modules/gatk4/mutect2/main.nf' addParams( options: [:] )
// used to run with the mitochondria mode setting as this increases sensitivity, allowing for some tumor_normal variants to be detected while the old test data is still in use, will be removed when new test data for sarek is available.
include { GATK4_MUTECT2 as GATK4_TEMPFIX_MUTECT2 } from '../../../../modules/gatk4/mutect2/main.nf' addParams( options: [args: '--mitochondria-mode'] )
workflow test_gatk4_mutect2_tumor_normal_pair {
input = [ [ id:'test'], // meta map
@ -12,6 +14,8 @@ workflow test_gatk4_mutect2_tumor_normal_pair {
run_single = false
run_pon = false
run_mito = false
interval_label = []
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
@ -20,7 +24,7 @@ workflow test_gatk4_mutect2_tumor_normal_pair {
panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz'], checkIfExists: true)
panel_of_normals_idx = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz_tbi'], checkIfExists: true)
GATK4_MUTECT2 ( input , run_single , run_pon , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
GATK4_TEMPFIX_MUTECT2 ( input , run_single , run_pon , run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
workflow test_gatk4_mutect2_tumor_single {
@ -31,6 +35,8 @@ workflow test_gatk4_mutect2_tumor_single {
run_single = true
run_pon = false
run_mito = false
interval_label = []
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
@ -39,7 +45,28 @@ workflow test_gatk4_mutect2_tumor_single {
panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz'], checkIfExists: true)
panel_of_normals_idx = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz_tbi'], checkIfExists: true)
GATK4_MUTECT2 ( input , run_single , run_pon , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
GATK4_MUTECT2 ( input , run_single , run_pon , run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
workflow test_gatk4_mutect2_cram_input {
input = [ [ id:'test'], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true)],
[ file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true)],
run_single = true
run_pon = false
run_mito = false
interval_label = []
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
germline_resource = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz'], checkIfExists: true)
germline_resource_idx = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz_tbi'], checkIfExists: true)
panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz'], checkIfExists: true)
panel_of_normals_idx = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz_tbi'], checkIfExists: true)
GATK4_MUTECT2 ( input , run_single , run_pon , run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
workflow test_gatk4_mutect2_generate_pon {
@ -50,6 +77,8 @@ workflow test_gatk4_mutect2_generate_pon {
run_single = false
run_pon = true
run_mito = false
interval_label = []
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
@ -58,5 +87,27 @@ workflow test_gatk4_mutect2_generate_pon {
panel_of_normals = []
panel_of_normals_idx = []
GATK4_MUTECT2 ( input , run_single , run_pon , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
GATK4_MUTECT2 ( input , run_single , run_pon, run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
// mitochondria mode would ideally have some mitochondria test data, but since the mitochondria settings only increase detection sensitivity, we can use the chr22 data as a stand in as it is already a small dataset, the extra variants detected compared to generate_pon shows the mode is working.
workflow test_gatk4_mutect2_mitochondria {
input = [ [ id:'test'], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true)],
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true)],
run_single = false
run_pon = false
run_mito = true
interval_label = 'chr22'
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
germline_resource = []
germline_resource_idx = []
panel_of_normals = []
panel_of_normals_idx = []
GATK4_MUTECT2 ( input , run_single , run_pon, run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
@ -7,7 +7,7 @@
- path: output/gatk4/test.f1r2.tar.gz
- path: output/gatk4/test.vcf.gz
- path: output/gatk4/test.vcf.gz.stats
md5sum: 6ecb874e6a95aa48233587b876c2a7a9
md5sum: 887d54e393510f1d0aa2c33bc6155161
- path: output/gatk4/test.vcf.gz.tbi
- name: gatk4 mutect2 test_gatk4_mutect2_tumor_single
@ -18,7 +18,18 @@
- path: output/gatk4/test.vcf.gz
- path: output/gatk4/test.vcf.gz.stats
md5sum: e7ef613f7d158b8a0adf44abe5db2029
md5sum: 106c5828b02b906c97922618b6072169
- path: output/gatk4/test.vcf.gz.tbi
- name: gatk4 mutect2 test_gatk4_mutect2_cram_input
command: nextflow run tests/modules/gatk4/mutect2 -entry test_gatk4_mutect2_cram_input -c tests/config/nextflow.config
- gatk4
- gatk4/mutect2
- path: output/gatk4/test.vcf.gz
- path: output/gatk4/test.vcf.gz.stats
md5sum: 106c5828b02b906c97922618b6072169
- path: output/gatk4/test.vcf.gz.tbi
- name: gatk4 mutect2 test_gatk4_mutect2_generate_pon
@ -31,3 +42,14 @@
- path: output/gatk4/test.vcf.gz.stats
md5sum: 4f77301a125913170b8e9e7828b4ca3f
- path: output/gatk4/test.vcf.gz.tbi
- name: gatk4 mutect2 test_gatk4_mutect2_mitochondria
command: nextflow run tests/modules/gatk4/mutect2 -entry test_gatk4_mutect2_mitochondria -c tests/config/nextflow.config
- gatk4
- gatk4/mutect2
- path: output/gatk4/test.vcf.gz
- path: output/gatk4/test.vcf.gz.stats
md5sum: fc6ea14ca2da346babe78161beea28c9
- path: output/gatk4/test.vcf.gz.tbi
@ -0,0 +1,85 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { SEQTK_MERGEPE } from '../../../../modules/seqtk/mergepe/main.nf' addParams( options: [:] )
include { KHMER_NORMALIZEBYMEDIAN } from '../../../../modules/khmer/normalizebymedian/main.nf' addParams( options: [:] )
include { KHMER_NORMALIZEBYMEDIAN as KHMER_NORMALIZEBYMEDIAN_ARGS } from '../../../../modules/khmer/normalizebymedian/main.nf' addParams( options: [args: '-C 20 -k 32'] )
workflow test_khmer_normalizebymedian_only_pe {
pe_reads = [
[ id:'khmer_test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
KHMER_NORMALIZEBYMEDIAN ( SEQTK_MERGEPE.out.reads.collect { it[1] }, [], 'only_pe' )
workflow test_khmer_normalizebymedian_only_se {
se_reads = [
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
KHMER_NORMALIZEBYMEDIAN ( [], se_reads, 'only_se' )
workflow test_khmer_normalizebymedian_mixed {
pe_reads = [
[ id:'khmer_test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
se_reads = file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
KHMER_NORMALIZEBYMEDIAN ( SEQTK_MERGEPE.out.reads.map { it[1] }, se_reads, 'mixed' )
workflow test_khmer_normalizebymedian_multiple_pe {
pe_reads = [
[ id:'khmer_test0', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
[ id:'khmer_test1', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
se_reads = file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
KHMER_NORMALIZEBYMEDIAN ( SEQTK_MERGEPE.out.reads.collect { it[1] }, se_reads, 'multiple_pe' )
workflow test_khmer_normalizebymedian_args {
pe_reads = [
[ id:'khmer_test0', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
se_reads = file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
KHMER_NORMALIZEBYMEDIAN_ARGS ( SEQTK_MERGEPE.out.reads.collect { it[1] }, se_reads, 'args' )
@ -0,0 +1,42 @@
# nf-core modules create-test-yml khmer/normalizebymedian
- name: khmer normalizebymedian only pe reads
command: nextflow run ./tests/modules/khmer/normalizebymedian -entry test_khmer_normalizebymedian_only_pe -c tests/config/nextflow.config
- khmer
- khmer/normalizebymedian
- path: output/khmer/only_pe.fastq.gz
# md5sum not stable even locally with docker (gzip done by tool)
#md5sum: 75e05f2e80cf4bd0b534d4b73f7c059c
- name: khmer normalizebymedian only se reads
command: nextflow run ./tests/modules/khmer/normalizebymedian -entry test_khmer_normalizebymedian_only_se -c tests/config/nextflow.config
- khmer
- khmer/normalizebymedian
- path: output/khmer/only_se.fastq.gz
- name: khmer normalizebymedian mixed reads
command: nextflow run ./tests/modules/khmer/normalizebymedian -entry test_khmer_normalizebymedian_mixed -c tests/config/nextflow.config
- khmer
- khmer/normalizebymedian
- path: output/khmer/mixed.fastq.gz
- name: khmer normalizebymedian multiple pe reads
command: nextflow run ./tests/modules/khmer/normalizebymedian -entry test_khmer_normalizebymedian_multiple_pe -c tests/config/nextflow.config
- khmer
- khmer/normalizebymedian
- path: output/khmer/multiple_pe.fastq.gz
- name: khmer normalizebymedian args
command: nextflow run ./tests/modules/khmer/normalizebymedian -entry test_khmer_normalizebymedian_args -c tests/config/nextflow.config
- khmer
- khmer/normalizebymedian
- path: output/khmer/args.fastq.gz
@ -11,5 +11,15 @@ workflow test_samtools_merge {
file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true)]
SAMTOOLS_MERGE ( input, [] )
workflow test_samtools_merge_cram {
input = [ [ id: 'test' ], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true),
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
SAMTOOLS_MERGE ( input, fasta )
@ -1,7 +1,15 @@
- name: samtools merge
command: nextflow run ./tests/modules/samtools/merge -entry test_samtools_merge -c tests/config/nextflow.config
- name: samtools merge test_samtools_merge
command: nextflow run tests/modules/samtools/merge -entry test_samtools_merge -c tests/config/nextflow.config
- samtools
- samtools/merge
- samtools
- path: output/samtools/test_merged.bam
- name: samtools merge test_samtools_merge_cram
command: nextflow run tests/modules/samtools/merge -entry test_samtools_merge_cram -c tests/config/nextflow.config
- samtools/merge
- samtools
- path: output/samtools/test_merged.cram
@ -10,5 +10,15 @@ workflow test_samtools_stats {
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)
SAMTOOLS_STATS ( input, [])
workflow test_samtools_stats_cram {
input = [ [ id: 'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true)
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
SAMTOOLS_STATS ( input, fasta )
@ -1,8 +1,17 @@
- name: samtools stats
command: nextflow run ./tests/modules/samtools/stats -entry test_samtools_stats -c tests/config/nextflow.config
- name: samtools stats test_samtools_stats
command: nextflow run tests/modules/samtools/stats -entry test_samtools_stats -c tests/config/nextflow.config
- samtools
- samtools/stats
- path: ./output/samtools/test.paired_end.sorted.bam.stats
- path: output/samtools/test.paired_end.sorted.bam.stats
md5sum: a7f36cf11fd3bf97e0a0ae29c0627296
- name: samtools stats test_samtools_stats_cram
command: nextflow run tests/modules/samtools/stats -entry test_samtools_stats_cram -c tests/config/nextflow.config
- samtools
- samtools/stats
- path: output/samtools/test.paired_end.recalibrated.sorted.cram.stats
md5sum: bd55a1da30028403f4b66dacf7a2a20e
@ -7,8 +7,17 @@ include { SAMTOOLS_VIEW } from '../../../../modules/samtools/view/main.nf' addPa
workflow test_samtools_view {
input = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true)
SAMTOOLS_VIEW ( input, [] )
workflow test_samtools_view_cram {
input = [ [ id: 'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true)
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
SAMTOOLS_VIEW ( input, fasta )
@ -1,8 +1,16 @@
- name: samtools view
- name: samtools view test_samtools_view
command: nextflow run tests/modules/samtools/view -entry test_samtools_view -c tests/config/nextflow.config
- samtools
- samtools/view
- samtools
- path: output/samtools/test.bam
md5sum: 8fb1e82f76416e9e30fc6b2357e2cf13
- name: samtools view test_samtools_view_cram
command: nextflow run tests/modules/samtools/view -entry test_samtools_view_cram -c tests/config/nextflow.config
- samtools/view
- samtools
- path: output/samtools/test.cram
Reference in a new issue