Merge remote-tracking branch 'nf-core/master'

This commit is contained in:
Chris Cheshire 2021-10-29 13:24:25 +01:00
commit 2cfd15e0f9
58 changed files with 1006 additions and 153 deletions

View file

@ -19,20 +19,21 @@ process BEDTOOLS_SORT {
}
input:
tuple val(meta), path(bed)
tuple val(meta), path(intervals)
val extension
output:
tuple val(meta), path('*.bed'), emit: bed
path "versions.yml" , emit: versions
tuple val(meta), path("*.${extension}"), emit: sorted
path "versions.yml" , emit: versions
script:
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
"""
bedtools \\
sort \\
-i $bed \\
-i $intervals \\
$options.args \\
> ${prefix}.bed
> ${prefix}.${extension}
cat <<-END_VERSIONS > versions.yml
${getProcessName(task.process)}:

View file

@ -15,20 +15,26 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bed:
- intervals:
type: file
description: Input BED file
pattern: "*.{bed}"
description: BED/BEDGRAPH
pattern: "*.{bed|bedGraph}"
- extension:
type: string
description: Extension of the output file (e. g., ".bg", ".bedgraph", ".txt", ".tab", etc.) It is set arbitrarily by the user and corresponds to the file format which depends on arguments.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bed:
- sorted:
type: file
description: Sorted BED file
pattern: "*.{bed}"
description: Sorted output file
pattern: "*.${extension}"
- versions:
type: file
description: File containing software versions
@ -37,3 +43,4 @@ authors:
- "@Emiller88"
- "@sruthipsuresh"
- "@drpatelh"
- "@chris-cheshire"

View file

@ -0,0 +1,78 @@
//
// Utility functions used in nf-core DSL2 module files
//
//
// Extract name of software tool from process name using $task.process
//
def getSoftwareName(task_process) {
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
}
//
// Extract name of module from process name using $task.process
//
def getProcessName(task_process) {
return task_process.tokenize(':')[-1]
}
//
// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
//
def initOptions(Map args) {
def Map options = [:]
options.args = args.args ?: ''
options.args2 = args.args2 ?: ''
options.args3 = args.args3 ?: ''
options.publish_by_meta = args.publish_by_meta ?: []
options.publish_dir = args.publish_dir ?: ''
options.publish_files = args.publish_files
options.suffix = args.suffix ?: ''
return options
}
//
// Tidy up and join elements of a list to return a path string
//
def getPathFromList(path_list) {
def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
return paths.join('/')
}
//
// Function to save/publish module results
//
def saveFiles(Map args) {
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
// Do not publish versions.yml unless running from pytest workflow
if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
return null
}
if (ioptions.publish_by_meta) {
def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
for (key in key_list) {
if (args.meta && key instanceof String) {
def path = key
if (args.meta.containsKey(key)) {
path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
}
path = path instanceof String ? path : ''
path_list.add(path)
}
}
}
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
return "${getPathFromList(ext_list)}/$args.filename"
}
}
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
}
}

View file

@ -0,0 +1,50 @@
// Import generic module functions
include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'
params.options = [:]
options = initOptions(params.options)
process FGBIO_GROUPREADSBYUMI {
tag "$meta.id"
label 'process_low'
publishDir "${params.outdir}",
mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }
conda (params.enable_conda ? "bioconda::fgbio=1.4.0" : null)
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
container "https://depot.galaxyproject.org/singularity/fgbio:1.4.0--hdfd78af_0"
} else {
container "quay.io/biocontainers/fgbio:1.4.0--hdfd78af_0"
}
input:
tuple val(meta), path(taggedbam)
val(strategy)
output:
tuple val(meta), path("*_umi-grouped.bam") , emit: bam
tuple val(meta), path("*_umi_histogram.txt"), emit: histogram
path "versions.yml" , emit: versions
script:
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
"""
mkdir tmp
fgbio \\
--tmp-dir=${PWD}/tmp \\
GroupReadsByUmi \\
-s $strategy \\
${options.args} \\
-i $taggedbam \\
-o ${prefix}_umi-grouped.bam \\
-f ${prefix}_umi_histogram.txt
cat <<-END_VERSIONS > versions.yml
${getProcessName(task.process)}:
${getSoftwareName(task.process)}: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
END_VERSIONS
"""
}

View file

@ -0,0 +1,59 @@
name: fgbio_groupreadsbyumi
description: |
Groups reads together that appear to have come from the same original molecule.
Reads are grouped by template, and then templates are sorted by the 5 mapping positions
of the reads from the template, used from earliest mapping position to latest.
Reads that have the same end positions are then sub-grouped by UMI sequence.
(!) Note: the MQ tag is required on reads with mapped mates (!)
This can be added using samblaster with the optional argument --addMateTags.
keywords:
- UMI
- groupreads
- fgbio
tools:
- fgbio:
description: A set of tools for working with genomic and high throughput sequencing data, including UMIs
homepage: http://fulcrumgenomics.github.io/fgbio/
documentation: http://fulcrumgenomics.github.io/fgbio/tools/latest/
tool_dev_url: https://github.com/fulcrumgenomics/fgbio
doi: ""
licence: ['MIT']
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: |
BAM file. Note: the MQ tag is required on reads with mapped mates (!)
pattern: "*.bam"
- strategy:
type: value
description: |
Reguired argument: defines the UMI assignment strategy.
Must be chosen among: Identity, Edit, Adjacency, Paired.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- bam:
type: file
description: UMI-grouped BAM
pattern: "*.bam"
- histogram:
type: file
description: A text file containing the tag family size counts
pattern: "*.txt"
authors:
- "@lescai"

View file

@ -19,7 +19,7 @@ process GATK4_APPLYBQSR {
}
input:
tuple val(meta), path(bam), path(bai), path(bqsr_table)
tuple val(meta), path(input), path(input_index), path(bqsr_table)
path fasta
path fastaidx
path dict
@ -32,12 +32,18 @@ process GATK4_APPLYBQSR {
script:
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def interval = intervals ? "-L ${intervals}" : ""
if (!task.memory) {
log.info '[GATK ApplyBQSR] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
}
"""
gatk ApplyBQSR \\
-R $fasta \\
-I $bam \\
-I $input \\
--bqsr-recal-file $bqsr_table \\
$interval \\
--tmp-dir . \\
-O ${prefix}.bam \\
$options.args

View file

@ -20,10 +20,14 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
- input:
type: file
description: BAM file from alignment
pattern: "*.{bam}"
description: BAM/CRAM file from alignment
pattern: "*.{bam,cram}"
- input_index:
type: file
description: BAI/CRAI file from alignment
pattern: "*.{bai,crai}"
- bqsr_table:
type: file
description: Recalibration table from gatk4_baserecalibrator

View file

@ -19,7 +19,7 @@ process GATK4_BASERECALIBRATOR {
}
input:
tuple val(meta), path(bam), path(bai)
tuple val(meta), path(input), path(input_index)
path fasta
path fastaidx
path dict
@ -35,12 +35,19 @@ process GATK4_BASERECALIBRATOR {
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def intervalsCommand = intervalsBed ? "-L ${intervalsBed}" : ""
def sitesCommand = knownSites.collect{"--known-sites ${it}"}.join(' ')
if (!task.memory) {
log.info '[GATK BaseRecalibrator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
}
"""
gatk BaseRecalibrator \
-R $fasta \
-I $bam \
-I $input \
$sitesCommand \
$intervalsCommand \
--tmp-dir . \
$options.args \
-O ${prefix}.table

View file

@ -20,10 +20,14 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
- input:
type: file
description: BAM file from alignment
pattern: "*.{bam}"
description: BAM/CRAM file from alignment
pattern: "*.{bam,cram}"
- input_index:
type: file
description: BAI/CRAI file from alignment
pattern: "*.{bai,crai}"
- fasta:
type: file
description: The reference fasta file
@ -57,3 +61,4 @@ output:
authors:
- "@yocra3"
- "@FriederikeHanssen"

View file

@ -19,10 +19,13 @@ process GATK4_HAPLOTYPECALLER {
}
input:
tuple val(meta), path(bam), path(bai)
tuple val(meta), path(input), path(input_index)
path fasta
path fai
path dict
path dbsnp
path dbsnp_tbi
path interval
output:
tuple val(meta), path("*.vcf.gz"), emit: vcf
@ -30,8 +33,10 @@ process GATK4_HAPLOTYPECALLER {
path "versions.yml" , emit: versions
script:
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def avail_mem = 3
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def interval_option = interval ? "-L ${interval}" : ""
def dbsnp_option = dbsnp ? "-D ${dbsnp}" : ""
def avail_mem = 3
if (!task.memory) {
log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
@ -42,9 +47,12 @@ process GATK4_HAPLOTYPECALLER {
--java-options "-Xmx${avail_mem}g" \\
HaplotypeCaller \\
-R $fasta \\
-I $bam \\
-I $input \\
${dbsnp_option} \\
${interval_option} \\
-O ${prefix}.vcf.gz \\
$options.args
$options.args \\
--tmp-dir .
cat <<-END_VERSIONS > versions.yml
${getProcessName(task.process)}:

View file

@ -21,14 +21,14 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
- input:
type: file
description: BAM file
pattern: "*.bam"
- bai:
description: BAM/CRAM file from alignment
pattern: "*.{bam,cram}"
- input_index:
type: file
description: Index of BAM file
pattern: "*.bam.bai"
description: BAI/CRAI file from alignment
pattern: "*.{bai,crai}"
- fasta:
type: file
description: The reference fasta file
@ -41,6 +41,16 @@ input:
type: file
description: GATK sequence dictionary
pattern: "*.dict"
- dbsnp:
type: file
description: VCF file containing known sites (optional)
- dbsnp_tbi:
type: file
description: VCF index of dbsnp (optional)
- interval:
type: file
description: Bed file with the genomic regions included in the library (optional)
output:
- meta:
type: map
@ -62,3 +72,4 @@ output:
authors:
- "@suzannejin"
- "@FriederikeHanssen"

View file

@ -19,21 +19,28 @@ process GATK4_MARKDUPLICATES {
}
input:
tuple val(meta), path(bam)
tuple val(meta), path(bams)
output:
tuple val(meta), path("*.bam") , emit: bam
tuple val(meta), path("*.bai") , emit: bai
tuple val(meta), path("*.metrics"), emit: metrics
path "versions.yml" , emit: versions
script:
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def bam_list = bams.collect(){ bam -> "--INPUT ".concat(bam.toString()) }.join(" ")
def avail_mem = 3
if (!task.memory) {
log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
}
"""
gatk MarkDuplicates \\
--INPUT $bam \\
$bam_list \\
--METRICS_FILE ${prefix}.metrics \\
--TMP_DIR . \\
--ASSUME_SORT_ORDER coordinate \\
--CREATE_INDEX true \\
--OUTPUT ${prefix}.bam \\
$options.args

View file

@ -47,3 +47,4 @@ output:
authors:
- "@ajodeh-juma"
- "@FriederikeHanssen"

View file

@ -19,9 +19,11 @@ process GATK4_MUTECT2 {
}
input:
tuple val(meta) , path(bam) , path(bai) , val(which_norm)
val run_single
val run_pon
tuple val(meta) , path(input) , path(input_index) , val(which_norm)
val run_single
val run_pon
val run_mito
val interval_label
path fasta
path fastaidx
path dict
@ -39,35 +41,34 @@ process GATK4_MUTECT2 {
script:
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def inputsList = []
def normalsList = []
def inputsCommand = ''
def panelsCommand = ''
def normalsCommand = ''
def panels_command = ''
def normals_command = ''
bam.each() {a -> inputsList.add(" -I " + a ) }
inputsCommand = inputsList.join( ' ')
def inputs_command = '-I ' + input.join( ' -I ')
if(run_pon) {
panelsCommand = ''
normalsCommand = ''
panels_command = ''
normals_command = ''
} else if(run_single) {
panelsCommand = " --germline-resource $germline_resource --panel-of-normals $panel_of_normals"
normalsCommand = ''
panels_command = " --germline-resource $germline_resource --panel-of-normals $panel_of_normals"
normals_command = ''
} else if(run_mito){
panels_command = "-L ${interval_label} --mitochondria-mode"
normals_command = ''
} else {
panelsCommand = " --germline-resource $germline_resource --panel-of-normals $panel_of_normals --f1r2-tar-gz ${prefix}.f1r2.tar.gz"
which_norm.each() {a -> normalsList.add(" -normal " + a ) }
normalsCommand = normalsList.join( ' ')
panels_command = " --germline-resource $germline_resource --panel-of-normals $panel_of_normals --f1r2-tar-gz ${prefix}.f1r2.tar.gz"
normals_command = '-normal ' + which_norm.join( ' -normal ')
}
"""
gatk Mutect2 \\
-R ${fasta} \\
${inputsCommand} \\
${normalsCommand} \\
${panelsCommand} \\
${inputs_command} \\
${normals_command} \\
${panels_command} \\
-O ${prefix}.vcf.gz \\
$options.args

View file

@ -22,23 +22,34 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test']
- bam:
- input:
type: list
description: list of BAM files
pattern: "*.bam"
- bai:
description: list of BAM files, also able to take CRAM as an input
pattern: "*.{bam/cram}"
- input_index:
type: list
description: list of BAM file indexes
pattern: "*.bam.bai"
description: list of BAM file indexes, also able to take CRAM indexes as an input
pattern: "*.{bam.bai/cram.crai}"
- which_norm:
type: list
description: optional list of sample headers contained in the normal sample bam files (these are required for tumor_normal_pair mode)
pattern: "testN"
- run_single:
type: boolean
description: Specify whether or not to run in tumor_single mode instead of tumor_normal_pair mode (will be ignored if run_pon is also true)
pattern: "true/false"
- run_pon:
type: boolean
description: Specify whether or not to run in panel_of_normal mode instead of tumor_normal_pair mode
pattern: "true/false"
- run_mito:
type: boolean
description: Specify whether or not to run in mitochondria-mode instead of tumor_normal_pair mode
pattern: "true/false"
- interval_label:
type: string
description: Specify the label used for mitochondrial chromosome when mutect2 is run in mitochondria mode.
pattern: "chrM"
- fasta:
type: file
description: The reference fasta file

View file

@ -0,0 +1,78 @@
//
// Utility functions used in nf-core DSL2 module files
//
//
// Extract name of software tool from process name using $task.process
//
def getSoftwareName(task_process) {
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
}
//
// Extract name of module from process name using $task.process
//
def getProcessName(task_process) {
return task_process.tokenize(':')[-1]
}
//
// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
//
def initOptions(Map args) {
def Map options = [:]
options.args = args.args ?: ''
options.args2 = args.args2 ?: ''
options.args3 = args.args3 ?: ''
options.publish_by_meta = args.publish_by_meta ?: []
options.publish_dir = args.publish_dir ?: ''
options.publish_files = args.publish_files
options.suffix = args.suffix ?: ''
return options
}
//
// Tidy up and join elements of a list to return a path string
//
def getPathFromList(path_list) {
def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
return paths.join('/')
}
//
// Function to save/publish module results
//
def saveFiles(Map args) {
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
// Do not publish versions.yml unless running from pytest workflow
if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
return null
}
if (ioptions.publish_by_meta) {
def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
for (key in key_list) {
if (args.meta && key instanceof String) {
def path = key
if (args.meta.containsKey(key)) {
path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
}
path = path instanceof String ? path : ''
path_list.add(path)
}
}
}
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
return "${getPathFromList(ext_list)}/$args.filename"
}
}
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
}
}

View file

@ -0,0 +1,49 @@
// Import generic module functions
include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'
params.options = [:]
options = initOptions(params.options)
process KHMER_NORMALIZEBYMEDIAN {
tag "${name}"
label 'process_long'
publishDir "${params.outdir}",
mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) }
conda (params.enable_conda ? "bioconda::khmer=3.0.0a3" : null)
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
container "https://depot.galaxyproject.org/singularity/khmer:3.0.0a3--py37haa7609a_2"
} else {
container "quay.io/biocontainers/khmer:3.0.0a3--py37haa7609a_2"
}
input:
path pe_reads
path se_reads
val name
output:
path "${name}.fastq.gz", emit: reads
path "versions.yml" , emit: versions
script:
pe_args = pe_reads ? "--paired" : ""
se_args = se_reads ? "--unpaired-reads ${se_reads}" : ""
files = pe_reads ? pe_reads : se_reads
"""
normalize-by-median.py \\
-M ${task.memory.toGiga()}e9 \\
--gzip ${options.args} \\
-o ${name}.fastq.gz \\
${pe_args} \\
${se_args} \\
${files}
cat <<-END_VERSIONS > versions.yml
${getProcessName(task.process)}:
${getSoftwareName(task.process)}: \$( normalize-by-median.py --version 2>&1 | grep ^khmer | sed 's/^khmer //' )
END_VERSIONS
"""
}

View file

@ -0,0 +1,39 @@
name: khmer_normalizebymedian
description: Module that calls normalize-by-median.py from khmer. The module can take a mix of paired end (interleaved) and single end reads. If both types are provided, only a single file with single ends is possible.
keywords:
- digital normalization
- khmer
tools:
- khmer:
description: khmer k-mer counting library
homepage: https://github.com/dib-lab/khmer
documentation: https://khmer.readthedocs.io/en/latest/
tool_dev_url: https://github.com/dib-lab/khmer
doi: "https://doi.org/10.12688/f1000research.6924.1"
licence: ['BSD License']
input:
- pe_reads:
type: files
description: Paired-end interleaved fastq files
pattern: "*.{fq,fastq}.gz"
- se_reads:
type: files
description: Single-end fastq files
pattern: "*.{fq,fastq}.gz"
- name:
type: string
description: filename for output file(s); ".fastq.gz" will be appended
output:
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- reads:
type: file
description: Interleaved fastq files
pattern: "*.{fq,fastq}.gz"
authors:
- "@erikrikarddaniel"

View file

@ -19,7 +19,7 @@ process MANTA_GERMLINE {
}
input:
tuple val(meta), path(cram), path(crai)
tuple val(meta), path(input), path(input_index)
path fasta
path fai
path target_bed
@ -39,7 +39,7 @@ process MANTA_GERMLINE {
def options_manta = target_bed ? "--exome --callRegions $target_bed" : ""
"""
configManta.py \
--bam $cram \
--bam $input \
--reference $fasta \
$options_manta \
--runDir manta

View file

@ -23,11 +23,11 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- cram:
- input:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- crai:
- input_index:
type: file
description: BAM/CRAM/SAM index file
pattern: "*.{bai,crai,sai}"

View file

@ -19,7 +19,7 @@ process MANTA_SOMATIC {
}
input:
tuple val(meta), path(cram_normal), path(crai_normal), path(cram_tumor), path(crai_tumor)
tuple val(meta), path(input_normal), path(input_index_normal), path(input_tumor), path(input_index_tumor)
path fasta
path fai
path target_bed
@ -42,8 +42,8 @@ process MANTA_SOMATIC {
"""
configManta.py \
--tumorBam $cram_tumor \
--normalBam $cram_normal \
--tumorBam $input_tumor \
--normalBam $input_normal \
--reference $fasta \
$options_manta \
--runDir manta

View file

@ -23,19 +23,19 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- cram_normal:
- input_normal:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- crai_normal:
- input_index_normal:
type: file
description: BAM/CRAM/SAM index file
pattern: "*.{bai,crai,sai}"
- cram_tumor:
- input_tumor:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- crai_tumor:
- input_index_tumor:
type: file
description: BAM/CRAM/SAM index file
pattern: "*.{bai,crai,sai}"

View file

@ -19,7 +19,7 @@ process MANTA_TUMORONLY {
}
input:
tuple val(meta), path(cram), path(crai)
tuple val(meta), path(input), path(input_index)
path fasta
path fai
path target_bed
@ -39,7 +39,7 @@ process MANTA_TUMORONLY {
def options_manta = target_bed ? "--exome --callRegions $target_bed" : ""
"""
configManta.py \
--tumorBam $cram \
--tumorBam $input \
--reference $fasta \
$options_manta \
--runDir manta

View file

@ -23,11 +23,11 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- cram:
- input:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- crai:
- input_index:
type: file
description: BAM/CRAM/SAM index file
pattern: "*.{bai,crai,sai}"
@ -54,7 +54,6 @@ output:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- candidate_small_indels_vcf:
type: file
description: Gzipped VCF file containing variants

View file

@ -19,16 +19,20 @@ process SAMTOOLS_MERGE {
}
input:
tuple val(meta), path(bams)
tuple val(meta), path(input_files)
path fasta
output:
tuple val(meta), path("${prefix}.bam"), emit: bam
path "versions.yml" , emit: versions
tuple val(meta), path("${prefix}.bam"), optional:true, emit: bam
tuple val(meta), path("${prefix}.cram"), optional:true, emit: cram
path "versions.yml" , emit: versions
script:
prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def file_type = input_files[0].getExtension()
def reference = fasta ? "--reference ${fasta}" : ""
"""
samtools merge ${prefix}.bam $bams
samtools merge ${reference} ${prefix}.${file_type} $input_files
cat <<-END_VERSIONS > versions.yml
${getProcessName(task.process)}:
${getSoftwareName(task.process)}: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')

View file

@ -1,5 +1,5 @@
name: samtools_merge
description: Merge BAM file
description: Merge BAM or CRAM file
keywords:
- merge
- bam
@ -21,20 +21,28 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
- input_files:
type: file
description: BAM file
description: BAM/CRAM file
pattern: "*.{bam,cram,sam}"
- fasta:
type: optional file
description: Reference file the CRAM was created with
pattern: "*.{fasta,fa}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- merged_bam:
- bam:
type: file
description: BAM file
pattern: "*.{bam}"
- cram:
type: file
description: CRAM file
pattern: "*.{cram}"
- versions:
type: file
description: File containing software versions
@ -43,3 +51,4 @@ authors:
- "@drpatelh"
- "@yuukiiwa "
- "@maxulysse"
- "@FriederikeHanssen"

View file

@ -19,15 +19,17 @@ process SAMTOOLS_STATS {
}
input:
tuple val(meta), path(bam), path(bai)
tuple val(meta), path(input), path(input_index)
path fasta
output:
tuple val(meta), path("*.stats"), emit: stats
path "versions.yml" , emit: versions
script:
def reference = fasta ? "--reference ${fasta}" : ""
"""
samtools stats $bam > ${bam}.stats
samtools stats ${reference} ${input} > ${input}.stats
cat <<-END_VERSIONS > versions.yml
${getProcessName(task.process)}:
${getSoftwareName(task.process)}: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')

View file

@ -22,14 +22,18 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- bai:
type: file
description: Index for BAM/CRAM/SAM file
pattern: "*.{bai,crai,sai}"
- input:
type: file
description: BAM/CRAM file from alignment
pattern: "*.{bam,cram}"
- input_index:
type: file
description: BAI/CRAI file from alignment
pattern: "*.{bai,crai}"
- fasta:
type: optional file
description: Reference file the CRAM was created with
pattern: "*.{fasta,fa}"
output:
- meta:
type: map
@ -46,3 +50,4 @@ output:
pattern: "versions.yml"
authors:
- "@drpatelh"
- "@FriederikeHanssen"

View file

@ -19,16 +19,20 @@ process SAMTOOLS_VIEW {
}
input:
tuple val(meta), path(bam)
tuple val(meta), path(input)
path fasta
output:
tuple val(meta), path("*.bam"), emit: bam
path "versions.yml" , emit: versions
tuple val(meta), path("*.bam") , optional: true, emit: bam
tuple val(meta), path("*.cram"), optional: true, emit: cram
path "versions.yml" , emit: versions
script:
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def reference = fasta ? "--reference ${fasta} -C" : ""
def file_type = input.getExtension()
"""
samtools view $options.args $bam > ${prefix}.bam
samtools view ${reference} $options.args $input > ${prefix}.${file_type}
cat <<-END_VERSIONS > versions.yml
${getProcessName(task.process)}:
${getSoftwareName(task.process)}: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')

View file

@ -21,10 +21,14 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
- input:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- fasta:
type: optional file
description: Reference file the CRAM was created with
pattern: "*.{fasta,fa}"
output:
- meta:
type: map
@ -33,8 +37,12 @@ output:
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: filtered/converted BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
description: filtered/converted BAM/SAM file
pattern: "*.{bam,sam}"
- cram:
type: file
description: filtered/converted CRAM file
pattern: "*.cram"
- versions:
type: file
description: File containing software versions
@ -42,3 +50,4 @@ output:
authors:
- "@drpatelh"
- "@joseespinosa"
- "@FriederikeHanssen"

View file

@ -19,7 +19,7 @@ process STRELKA_GERMLINE {
}
input:
tuple val(meta), path(bam), path(bai)
tuple val(meta), path(input), path(input_index)
path fasta
path fai
path target_bed
@ -38,7 +38,7 @@ process STRELKA_GERMLINE {
def regions = target_bed ? "--exome --callRegions ${target_bed}" : ""
"""
configureStrelkaGermlineWorkflow.py \\
--bam $bam \\
--bam $input \\
--referenceFasta $fasta \\
$regions \\
$options.args \\

View file

@ -21,14 +21,14 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test']
- bam:
- input:
type: file
description: BAM file
pattern: "*.{bam}"
- bai:
description: BAM/CRAM file
pattern: "*.{bam,cram}"
- input_index:
type: file
description: BAM index file
pattern: "*.{bai}"
description: BAM/CRAI index file
pattern: "*.{bai,crai}"
- target_bed:
type: file
description: An optional bed file

View file

@ -19,7 +19,7 @@ process STRELKA_SOMATIC {
}
input:
tuple val(meta), path(cram_normal), path(crai_normal), path(cram_tumor), path(crai_tumor), path(manta_candidate_small_indels), path(manta_candidate_small_indels_tbi)
tuple val(meta), path(input_normal), path(input_index_normal), path(input_tumor), path(input_index_tumor), path(manta_candidate_small_indels), path(manta_candidate_small_indels_tbi)
path fasta
path fai
path target_bed
@ -38,8 +38,8 @@ process STRELKA_SOMATIC {
def options_manta = manta_candidate_small_indels ? "--indelCandidates ${manta_candidate_small_indels}" : ""
"""
configureStrelkaSomaticWorkflow.py \\
--tumor $cram_tumor \\
--normal $cram_normal \\
--tumor $input_tumor \\
--normal $input_normal \\
--referenceFasta $fasta \\
$options_target_bed \\
$options_manta \\

View file

@ -21,19 +21,19 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- cram_normal:
- input_normal:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- crai_normal:
- input_index_normal:
type: file
description: BAM/CRAM/SAM index file
pattern: "*.{bai,crai,sai}"
- cram_tumor:
- input_tumor:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- crai_tumor:
- input_index_tumor:
type: file
description: BAM/CRAM/SAM index file
pattern: "*.{bai,crai,sai}"

View file

@ -15,7 +15,7 @@ workflow BAM_STATS_SAMTOOLS {
main:
ch_versions = Channel.empty()
SAMTOOLS_STATS ( ch_bam_bai )
SAMTOOLS_STATS ( ch_bam_bai, [] )
ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first())
SAMTOOLS_FLAGSTAT ( ch_bam_bai )

View file

@ -394,6 +394,10 @@ fgbio/fastqtobam:
- modules/fgbio/fastqtobam/**
- tests/modules/fgbio/fastqtobam/**
fgbio/groupreadsbyumi:
- modules/fgbio/groupreadsbyumi/**
- tests/modules/fgbio/groupreadsbyumi/**
fgbio/sortbam:
- modules/fgbio/sortbam/**
- tests/modules/fgbio/sortbam/**
@ -629,6 +633,10 @@ kallistobustools/ref:
- modules/kallistobustools/ref/**
- tests/modules/kallistobustools/ref/**
khmer/normalizebymedian:
- modules/khmer/normalizebymedian/**
- tests/modules/khmer/normalizebymedian/**
kleborate:
- modules/kleborate/**
- tests/modules/kleborate/**

View file

@ -9,5 +9,5 @@ workflow test_bedtools_sort {
file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true)
]
BEDTOOLS_SORT ( input )
BEDTOOLS_SORT ( input, "testext" )
}

View file

@ -4,5 +4,5 @@
- bedtools
- bedtools/sort
files:
- path: ./output/bedtools/test_out.bed
- path: ./output/bedtools/test_out.testext
md5sum: fe4053cf4de3aebbdfc3be2efb125a74

View file

@ -0,0 +1,15 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { FGBIO_GROUPREADSBYUMI } from '../../../../modules/fgbio/groupreadsbyumi/main.nf' addParams( options: [:] )
workflow test_fgbio_groupreadsbyumi {
input = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_umi_unsorted_tagged_bam'], checkIfExists: true) ]
strategy = "Adjacency"
FGBIO_GROUPREADSBYUMI ( input, strategy )
}

View file

@ -0,0 +1,10 @@
- name: fgbio groupreadsbyumi test_fgbio_groupreadsbyumi
command: nextflow run tests/modules/fgbio/groupreadsbyumi -entry test_fgbio_groupreadsbyumi -c tests/config/nextflow.config
tags:
- fgbio
- fgbio/groupreadsbyumi
files:
- path: output/fgbio/test_umi-grouped.bam
md5sum: f1e53fc845fd99a3da172eb8063dff0b
- path: output/fgbio/test_umi_histogram.txt
md5sum: d17fd167b2a765d46e4b01bf08ece01b

View file

@ -30,3 +30,17 @@ workflow test_gatk4_applybqsr_intervals {
GATK4_APPLYBQSR ( input, fasta, fai, dict, intervals )
}
workflow test_gatk4_applybqsr_cram {
input = [ [ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_baserecalibrator_table'], checkIfExists: true)
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
intervals = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)
GATK4_APPLYBQSR ( input, fasta, fai, dict, intervals )
}

View file

@ -1,17 +1,26 @@
- name: gatk4 applybqsr test_gatk4_applybqsr
command: nextflow run tests/modules/gatk4/applybqsr -entry test_gatk4_applybqsr -c tests/config/nextflow.config
tags:
- gatk4
- gatk4/applybqsr
- gatk4
files:
- path: output/gatk4/test.bam
md5sum: dac716c394db5e83c12b44355c098ca7
md5sum: 87a2eabae2b7b41574f966612b5addae
- name: gatk4 applybqsr test_gatk4_applybqsr_intervals
command: nextflow run tests/modules/gatk4/applybqsr -entry test_gatk4_applybqsr_intervals -c tests/config/nextflow.config
tags:
- gatk4
- gatk4/applybqsr
- gatk4
files:
- path: output/gatk4/test.bam
md5sum: 400441dbe5344658580ba0a24ba57069
md5sum: 9c015d3c1dbd9eee793b7386f432b6aa
- name: gatk4 applybqsr test_gatk4_applybqsr_cram
command: nextflow run tests/modules/gatk4/applybqsr -entry test_gatk4_applybqsr_cram -c tests/config/nextflow.config
tags:
- gatk4/applybqsr
- gatk4
files:
- path: output/gatk4/test.bam
md5sum: 02f84815fdbc99c21c8d42ebdcabbbf7

View file

@ -18,6 +18,21 @@ workflow test_gatk4_baserecalibrator {
GATK4_BASERECALIBRATOR ( input, fasta, fai, dict, [], sites, sites_tbi )
}
workflow test_gatk4_baserecalibrator_cram {
input = [ [ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_baserecalibrator_table'], checkIfExists: true)
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
sites = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true)
sites_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true)
GATK4_BASERECALIBRATOR ( input, fasta, fai, dict, [], sites, sites_tbi )
}
workflow test_gatk4_baserecalibrator_intervals {
input = [ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),

View file

@ -1,17 +1,26 @@
- name: gatk4 baserecalibrator test_gatk4_baserecalibrator
command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator -c tests/config/nextflow.config
tags:
- gatk4/baserecalibrator
- gatk4
- gatk4/baserecalibrator
files:
- path: output/gatk4/test.table
md5sum: e2e43abdc0c943c1a54dae816d0b9ea7
- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_cram
command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_cram -c tests/config/nextflow.config
tags:
- gatk4
- gatk4/baserecalibrator
files:
- path: output/gatk4/test.table
md5sum: 35d89a3811aa31711fc9815b6b80e6ec
- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_intervals
command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_intervals -c tests/config/nextflow.config
tags:
- gatk4/baserecalibrator
- gatk4
- gatk4/baserecalibrator
files:
- path: output/gatk4/test.table
md5sum: 9ecb5f00a2229291705addc09c0ec231
@ -19,8 +28,8 @@
- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_multiple_sites
command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_multiple_sites -c tests/config/nextflow.config
tags:
- gatk4/baserecalibrator
- gatk4
- gatk4/baserecalibrator
files:
- path: output/gatk4/test.table
md5sum: e2e43abdc0c943c1a54dae816d0b9ea7

View file

@ -13,5 +13,33 @@ workflow test_gatk4_haplotypecaller {
fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict )
GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, [], [], [] )
}
workflow test_gatk4_haplotypecaller_cram {
input = [ [ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true)
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, [], [], [] )
}
workflow test_gatk4_haplotypecaller_intervals_dbsnp {
input = [ [ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true)
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
sites = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true)
sites_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true)
intervals = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)
GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, sites, sites_tbi, intervals )
}

View file

@ -1,13 +1,26 @@
- name: gatk4 haplotypecaller test_gatk4_haplotypecaller
command: nextflow run tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller -c tests/config/nextflow.config
tags:
- gatk4
- gatk4/haplotypecaller
- gatk4
files:
- path: output/gatk4/test.vcf.gz
- path: output/gatk4/test.vcf.gz.tbi
- name: gatk4 haplotypecaller test_gatk4_haplotypecaller_cram
command: nextflow run tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_cram -c tests/config/nextflow.config
tags:
- gatk4/haplotypecaller
- gatk4
files:
- path: output/gatk4/test.vcf.gz
- path: output/gatk4/test.vcf.gz.tbi
- name: gatk4 haplotypecaller test_gatk4_haplotypecaller_intervals_dbsnp
command: nextflow run tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_intervals_dbsnp -c tests/config/nextflow.config
tags:
- gatk4/haplotypecaller
- gatk4
files:
- path: output/gatk4/test.vcf.gz
should_exist: true
contains:
- 'MT192765.1'
- '54.60'
- '37.32'
- path: output/gatk4/test.vcf.gz.tbi

View file

@ -11,3 +11,12 @@ workflow test_gatk4_markduplicates {
GATK4_MARKDUPLICATES ( input )
}
workflow test_gatk4_markduplicates_multiple_bams {
input = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true)
]
GATK4_MARKDUPLICATES ( input )
}

View file

@ -1,8 +1,23 @@
- name: gatk4 markduplicates test_gatk4_markduplicates
command: nextflow run tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates -c tests/config/nextflow.config
tags:
- gatk4
- gatk4/markduplicates
- gatk4
files:
- path: output/gatk4/test.bai
md5sum: e9c125e82553209933883b4fe2b8d7c2
- path: output/gatk4/test.bam
md5sum: 3b6facab3afbacfa08a7a975efbd2c6b
md5sum: bda9a7bf5057f2288ed70be3eb8a753f
- path: output/gatk4/test.metrics
- name: gatk4 markduplicates test_gatk4_markduplicates_multiple_bams
command: nextflow run tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates_multiple_bams -c tests/config/nextflow.config
tags:
- gatk4/markduplicates
- gatk4
files:
- path: output/gatk4/test.bai
md5sum: 93cebe29e7cca2064262b739235cca9b
- path: output/gatk4/test.bam
md5sum: dcd6f584006b04141fb787001a8ecacc
- path: output/gatk4/test.metrics

View file

@ -3,6 +3,8 @@
nextflow.enable.dsl = 2
include { GATK4_MUTECT2 } from '../../../../modules/gatk4/mutect2/main.nf' addParams( options: [:] )
// used to run with the mitochondria mode setting as this increases sensitivity, allowing for some tumor_normal variants to be detected while the old test data is still in use, will be removed when new test data for sarek is available.
include { GATK4_MUTECT2 as GATK4_TEMPFIX_MUTECT2 } from '../../../../modules/gatk4/mutect2/main.nf' addParams( options: [args: '--mitochondria-mode'] )
workflow test_gatk4_mutect2_tumor_normal_pair {
input = [ [ id:'test'], // meta map
@ -12,6 +14,8 @@ workflow test_gatk4_mutect2_tumor_normal_pair {
]
run_single = false
run_pon = false
run_mito = false
interval_label = []
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
@ -20,7 +24,7 @@ workflow test_gatk4_mutect2_tumor_normal_pair {
panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz'], checkIfExists: true)
panel_of_normals_idx = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz_tbi'], checkIfExists: true)
GATK4_MUTECT2 ( input , run_single , run_pon , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
GATK4_TEMPFIX_MUTECT2 ( input , run_single , run_pon , run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
}
workflow test_gatk4_mutect2_tumor_single {
@ -31,6 +35,8 @@ workflow test_gatk4_mutect2_tumor_single {
]
run_single = true
run_pon = false
run_mito = false
interval_label = []
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
@ -39,7 +45,28 @@ workflow test_gatk4_mutect2_tumor_single {
panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz'], checkIfExists: true)
panel_of_normals_idx = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz_tbi'], checkIfExists: true)
GATK4_MUTECT2 ( input , run_single , run_pon , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
GATK4_MUTECT2 ( input , run_single , run_pon , run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
}
workflow test_gatk4_mutect2_cram_input {
input = [ [ id:'test'], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true)],
[ file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true)],
[]
]
run_single = true
run_pon = false
run_mito = false
interval_label = []
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
germline_resource = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz'], checkIfExists: true)
germline_resource_idx = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz_tbi'], checkIfExists: true)
panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz'], checkIfExists: true)
panel_of_normals_idx = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz_tbi'], checkIfExists: true)
GATK4_MUTECT2 ( input , run_single , run_pon , run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
}
workflow test_gatk4_mutect2_generate_pon {
@ -50,6 +77,8 @@ workflow test_gatk4_mutect2_generate_pon {
]
run_single = false
run_pon = true
run_mito = false
interval_label = []
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
@ -58,5 +87,27 @@ workflow test_gatk4_mutect2_generate_pon {
panel_of_normals = []
panel_of_normals_idx = []
GATK4_MUTECT2 ( input , run_single , run_pon , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
GATK4_MUTECT2 ( input , run_single , run_pon, run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
}
// mitochondria mode would ideally have some mitochondria test data, but since the mitochondria settings only increase detection sensitivity, we can use the chr22 data as a stand in as it is already a small dataset, the extra variants detected compared to generate_pon shows the mode is working.
workflow test_gatk4_mutect2_mitochondria {
input = [ [ id:'test'], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true)],
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true)],
[]
]
run_single = false
run_pon = false
run_mito = true
interval_label = 'chr22'
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
germline_resource = []
germline_resource_idx = []
panel_of_normals = []
panel_of_normals_idx = []
GATK4_MUTECT2 ( input , run_single , run_pon, run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
}

View file

@ -7,7 +7,7 @@
- path: output/gatk4/test.f1r2.tar.gz
- path: output/gatk4/test.vcf.gz
- path: output/gatk4/test.vcf.gz.stats
md5sum: 6ecb874e6a95aa48233587b876c2a7a9
md5sum: 887d54e393510f1d0aa2c33bc6155161
- path: output/gatk4/test.vcf.gz.tbi
- name: gatk4 mutect2 test_gatk4_mutect2_tumor_single
@ -18,7 +18,18 @@
files:
- path: output/gatk4/test.vcf.gz
- path: output/gatk4/test.vcf.gz.stats
md5sum: e7ef613f7d158b8a0adf44abe5db2029
md5sum: 106c5828b02b906c97922618b6072169
- path: output/gatk4/test.vcf.gz.tbi
- name: gatk4 mutect2 test_gatk4_mutect2_cram_input
command: nextflow run tests/modules/gatk4/mutect2 -entry test_gatk4_mutect2_cram_input -c tests/config/nextflow.config
tags:
- gatk4
- gatk4/mutect2
files:
- path: output/gatk4/test.vcf.gz
- path: output/gatk4/test.vcf.gz.stats
md5sum: 106c5828b02b906c97922618b6072169
- path: output/gatk4/test.vcf.gz.tbi
- name: gatk4 mutect2 test_gatk4_mutect2_generate_pon
@ -31,3 +42,14 @@
- path: output/gatk4/test.vcf.gz.stats
md5sum: 4f77301a125913170b8e9e7828b4ca3f
- path: output/gatk4/test.vcf.gz.tbi
- name: gatk4 mutect2 test_gatk4_mutect2_mitochondria
command: nextflow run tests/modules/gatk4/mutect2 -entry test_gatk4_mutect2_mitochondria -c tests/config/nextflow.config
tags:
- gatk4
- gatk4/mutect2
files:
- path: output/gatk4/test.vcf.gz
- path: output/gatk4/test.vcf.gz.stats
md5sum: fc6ea14ca2da346babe78161beea28c9
- path: output/gatk4/test.vcf.gz.tbi

View file

@ -0,0 +1,85 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { SEQTK_MERGEPE } from '../../../../modules/seqtk/mergepe/main.nf' addParams( options: [:] )
include { KHMER_NORMALIZEBYMEDIAN } from '../../../../modules/khmer/normalizebymedian/main.nf' addParams( options: [:] )
include { KHMER_NORMALIZEBYMEDIAN as KHMER_NORMALIZEBYMEDIAN_ARGS } from '../../../../modules/khmer/normalizebymedian/main.nf' addParams( options: [args: '-C 20 -k 32'] )
workflow test_khmer_normalizebymedian_only_pe {
pe_reads = [
[ id:'khmer_test', single_end:false ], // meta map
[
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
]
]
SEQTK_MERGEPE(pe_reads)
KHMER_NORMALIZEBYMEDIAN ( SEQTK_MERGEPE.out.reads.collect { it[1] }, [], 'only_pe' )
}
workflow test_khmer_normalizebymedian_only_se {
se_reads = [
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
]
KHMER_NORMALIZEBYMEDIAN ( [], se_reads, 'only_se' )
}
workflow test_khmer_normalizebymedian_mixed {
pe_reads = [
[ id:'khmer_test', single_end:false ], // meta map
[
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
]
]
se_reads = file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
SEQTK_MERGEPE(pe_reads)
KHMER_NORMALIZEBYMEDIAN ( SEQTK_MERGEPE.out.reads.map { it[1] }, se_reads, 'mixed' )
}
workflow test_khmer_normalizebymedian_multiple_pe {
pe_reads = [
[ id:'khmer_test0', single_end:false ], // meta map
[
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
],
[ id:'khmer_test1', single_end:false ], // meta map
[
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
]
]
se_reads = file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
SEQTK_MERGEPE(pe_reads)
KHMER_NORMALIZEBYMEDIAN ( SEQTK_MERGEPE.out.reads.collect { it[1] }, se_reads, 'multiple_pe' )
}
workflow test_khmer_normalizebymedian_args {
pe_reads = [
[ id:'khmer_test0', single_end:false ], // meta map
[
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
]
]
se_reads = file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
SEQTK_MERGEPE(pe_reads)
KHMER_NORMALIZEBYMEDIAN_ARGS ( SEQTK_MERGEPE.out.reads.collect { it[1] }, se_reads, 'args' )
}

View file

@ -0,0 +1,42 @@
# nf-core modules create-test-yml khmer/normalizebymedian
- name: khmer normalizebymedian only pe reads
command: nextflow run ./tests/modules/khmer/normalizebymedian -entry test_khmer_normalizebymedian_only_pe -c tests/config/nextflow.config
tags:
- khmer
- khmer/normalizebymedian
files:
- path: output/khmer/only_pe.fastq.gz
# md5sum not stable even locally with docker (gzip done by tool)
#md5sum: 75e05f2e80cf4bd0b534d4b73f7c059c
- name: khmer normalizebymedian only se reads
command: nextflow run ./tests/modules/khmer/normalizebymedian -entry test_khmer_normalizebymedian_only_se -c tests/config/nextflow.config
tags:
- khmer
- khmer/normalizebymedian
files:
- path: output/khmer/only_se.fastq.gz
- name: khmer normalizebymedian mixed reads
command: nextflow run ./tests/modules/khmer/normalizebymedian -entry test_khmer_normalizebymedian_mixed -c tests/config/nextflow.config
tags:
- khmer
- khmer/normalizebymedian
files:
- path: output/khmer/mixed.fastq.gz
- name: khmer normalizebymedian multiple pe reads
command: nextflow run ./tests/modules/khmer/normalizebymedian -entry test_khmer_normalizebymedian_multiple_pe -c tests/config/nextflow.config
tags:
- khmer
- khmer/normalizebymedian
files:
- path: output/khmer/multiple_pe.fastq.gz
- name: khmer normalizebymedian args
command: nextflow run ./tests/modules/khmer/normalizebymedian -entry test_khmer_normalizebymedian_args -c tests/config/nextflow.config
tags:
- khmer
- khmer/normalizebymedian
files:
- path: output/khmer/args.fastq.gz

View file

@ -11,5 +11,15 @@ workflow test_samtools_merge {
file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true)]
]
SAMTOOLS_MERGE ( input )
SAMTOOLS_MERGE ( input, [] )
}
workflow test_samtools_merge_cram {
input = [ [ id: 'test' ], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true),
]
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
SAMTOOLS_MERGE ( input, fasta )
}

View file

@ -1,7 +1,15 @@
- name: samtools merge
command: nextflow run ./tests/modules/samtools/merge -entry test_samtools_merge -c tests/config/nextflow.config
- name: samtools merge test_samtools_merge
command: nextflow run tests/modules/samtools/merge -entry test_samtools_merge -c tests/config/nextflow.config
tags:
- samtools
- samtools/merge
- samtools
files:
- path: output/samtools/test_merged.bam
- name: samtools merge test_samtools_merge_cram
command: nextflow run tests/modules/samtools/merge -entry test_samtools_merge_cram -c tests/config/nextflow.config
tags:
- samtools/merge
- samtools
files:
- path: output/samtools/test_merged.cram

View file

@ -10,5 +10,15 @@ workflow test_samtools_stats {
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)
]
SAMTOOLS_STATS ( input )
SAMTOOLS_STATS ( input, [])
}
workflow test_samtools_stats_cram {
input = [ [ id: 'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true)
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
SAMTOOLS_STATS ( input, fasta )
}

View file

@ -1,8 +1,17 @@
- name: samtools stats
command: nextflow run ./tests/modules/samtools/stats -entry test_samtools_stats -c tests/config/nextflow.config
- name: samtools stats test_samtools_stats
command: nextflow run tests/modules/samtools/stats -entry test_samtools_stats -c tests/config/nextflow.config
tags:
- samtools
- samtools/stats
files:
- path: ./output/samtools/test.paired_end.sorted.bam.stats
- path: output/samtools/test.paired_end.sorted.bam.stats
md5sum: a7f36cf11fd3bf97e0a0ae29c0627296
- name: samtools stats test_samtools_stats_cram
command: nextflow run tests/modules/samtools/stats -entry test_samtools_stats_cram -c tests/config/nextflow.config
tags:
- samtools
- samtools/stats
files:
- path: output/samtools/test.paired_end.recalibrated.sorted.cram.stats
md5sum: bd55a1da30028403f4b66dacf7a2a20e

View file

@ -7,8 +7,17 @@ include { SAMTOOLS_VIEW } from '../../../../modules/samtools/view/main.nf' addPa
workflow test_samtools_view {
input = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true)
]
SAMTOOLS_VIEW ( input )
SAMTOOLS_VIEW ( input, [] )
}
workflow test_samtools_view_cram {
input = [ [ id: 'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true)
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
SAMTOOLS_VIEW ( input, fasta )
}

View file

@ -1,8 +1,16 @@
- name: samtools view
- name: samtools view test_samtools_view
command: nextflow run tests/modules/samtools/view -entry test_samtools_view -c tests/config/nextflow.config
tags:
- samtools
- samtools/view
- samtools
files:
- path: output/samtools/test.bam
md5sum: 8fb1e82f76416e9e30fc6b2357e2cf13
- name: samtools view test_samtools_view_cram
command: nextflow run tests/modules/samtools/view -entry test_samtools_view_cram -c tests/config/nextflow.config
tags:
- samtools/view
- samtools
files:
- path: output/samtools/test.cram