Merge branch 'master' into haplocheck

This commit is contained in:
Taniguti 2022-06-10 20:39:33 -03:00 committed by GitHub
commit 547f483b74
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
50 changed files with 1023 additions and 66 deletions

48
modules/ampir/main.nf Normal file
View file

@ -0,0 +1,48 @@
process AMPIR {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "conda-forge::r-ampir=1.1.0" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/r-ampir:1.1.0':
'quay.io/biocontainers/r-ampir:1.1.0' }"
input:
tuple val(meta), path(faa)
val model
val min_length
val min_probability
output:
tuple val(meta), path("*.faa"), emit: amps_faa
tuple val(meta), path("*.tsv"), emit: amps_tsv
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
min_length = ("${min_length}" == "[]") ? "": " min_len = as.integer(${min_length})," // Fall back to AMPir default value if none specified
if ("$faa" == "${prefix}.faa") error "Input and output names are the same, set prefix in module configuration to disambiguate!"
"""
#!/usr/bin/env Rscript
library(ampir)
input_seqs <- read_faa('${faa}')
prediction <- predict_amps(input_seqs,${min_length} model = '${model}')
prediction <- prediction[which(prediction\$prob_AMP >= as.numeric(${min_probability})), ]
output_seqs <- input_seqs[row.names(prediction), ]
write.table(prediction, file = "${prefix}.tsv", row.names = FALSE, sep = "\t", quote = FALSE, dec = '.')
df_to_faa(output_seqs, "${prefix}.faa")
version_file_path <- "versions.yml"
version_ampir <- paste(unlist(packageVersion("ampir")), collapse = ".")
f <- file(version_file_path, "w")
writeLines('"${task.process}":', f)
writeLines(" ampir: ", f, sep = "")
writeLines(version_ampir, f)
close(f)
"""
}

59
modules/ampir/meta.yml Normal file
View file

@ -0,0 +1,59 @@
name: "ampir"
description: A fast and user-friendly method to predict antimicrobial peptides (AMPs) from any given size protein dataset. ampir uses a supervised statistical machine learning approach to predict AMPs.
keywords:
- ampir
- amp
- antimicrobial peptide prediction
tools:
- "ampir":
description: "A toolkit to predict antimicrobial peptides from protein sequences on a genome-wide scale."
homepage: "https://github.com/Legana/ampir"
documentation: "https://cran.r-project.org/web/packages/ampir/index.html"
tool_dev_url: "https://github.com/Legana/ampir"
doi: "10.1093/bioinformatics/btaa653"
licence: ["GPL v2"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- faa:
type: file
description: FASTA file containing amino acid sequences
pattern: "*.{faa,fasta}"
- model:
type: value
description: Built-in model for AMP prediction
pattern: "{precursor,mature}"
- min_length:
type: value
description: Minimum protein length for which predictions will be generated
pattern: "[0-9]+"
- min_probability:
type: value
description: Cut-off for AMP prediction
pattern: "[0-9][0-9]"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- amps_faa:
type: file
description: File containing AMP predictions in amino acid FASTA format
pattern: "*.{faa}"
- amps_tsv:
type: file
description: File containing AMP predictions in TSV format
pattern: "*.tsv"
authors:
- "@jasmezz"

View file

@ -1,4 +1,4 @@
# Dockerfile to create container with Cell Ranger v6.1.2 # Dockerfile to create container with Cell Ranger v7.0.0
# Push to nfcore/cellranger:<VER> # Push to nfcore/cellranger:<VER>
FROM continuumio/miniconda3:4.8.2 FROM continuumio/miniconda3:4.8.2

View file

@ -5,7 +5,7 @@ process CELLRANGER_COUNT {
if (params.enable_conda) { if (params.enable_conda) {
exit 1, "Conda environments cannot be used when using the Cell Ranger tool. Please use docker or singularity containers." exit 1, "Conda environments cannot be used when using the Cell Ranger tool. Please use docker or singularity containers."
} }
container "nfcore/cellranger:6.1.2" container "nfcore/cellranger:7.0.0"
input: input:
tuple val(meta), path(reads) tuple val(meta), path(reads)

View file

@ -1,4 +1,4 @@
# Dockerfile to create container with Cell Ranger v6.1.2 and bcl2fastq v2.20.0 # Dockerfile to create container with Cell Ranger v7.0.0 and bcl2fastq v2.20.0
# Push to nfcore/cellrangermkfastq:<VER> # Push to nfcore/cellrangermkfastq:<VER>
FROM continuumio/miniconda3:4.8.2 FROM continuumio/miniconda3:4.8.2
@ -17,7 +17,7 @@ RUN apt-get update --allow-releaseinfo-change \
# Copy pre-downloaded bcl2fastq2 and cellranger file # Copy pre-downloaded bcl2fastq2 and cellranger file
ENV BCL2FASTQ2_VER=v2-20-0-linux-x86-64 \ ENV BCL2FASTQ2_VER=v2-20-0-linux-x86-64 \
CELLRANGER_VER=6.1.2 CELLRANGER_VER=7.0.0
COPY bcl2fastq2-$BCL2FASTQ2_VER.zip /tmp/bcl2fastq2-$BCL2FASTQ2_VER.zip COPY bcl2fastq2-$BCL2FASTQ2_VER.zip /tmp/bcl2fastq2-$BCL2FASTQ2_VER.zip
COPY cellranger-$CELLRANGER_VER.tar.gz /opt/cellranger-$CELLRANGER_VER.tar.gz COPY cellranger-$CELLRANGER_VER.tar.gz /opt/cellranger-$CELLRANGER_VER.tar.gz

View file

@ -5,7 +5,7 @@ process CELLRANGER_MKFASTQ {
if (params.enable_conda) { if (params.enable_conda) {
exit 1, "Conda environments cannot be used when using the Cell Ranger tool. Please use docker or singularity containers." exit 1, "Conda environments cannot be used when using the Cell Ranger tool. Please use docker or singularity containers."
} }
container "nfcore/cellrangermkfastq:6.1.2" container "nfcore/cellrangermkfastq:7.0.0"
input: input:
path bcl path bcl

View file

@ -5,7 +5,7 @@ process CELLRANGER_MKGTF {
if (params.enable_conda) { if (params.enable_conda) {
exit 1, "Conda environments cannot be used when using the Cell Ranger tool. Please use docker or singularity containers." exit 1, "Conda environments cannot be used when using the Cell Ranger tool. Please use docker or singularity containers."
} }
container "nfcore/cellranger:6.1.2" container "nfcore/cellranger:7.0.0"
input: input:
path gtf path gtf

View file

@ -5,7 +5,7 @@ process CELLRANGER_MKREF {
if (params.enable_conda) { if (params.enable_conda) {
exit 1, "Conda environments cannot be used when using the Cell Ranger tool. Please use docker or singularity containers." exit 1, "Conda environments cannot be used when using the Cell Ranger tool. Please use docker or singularity containers."
} }
container "nfcore/cellranger:6.1.2" container "nfcore/cellranger:7.0.0"
input: input:
path fasta path fasta

View file

@ -13,6 +13,7 @@ process ENSEMBLVEP {
val species val species
val cache_version val cache_version
path cache path cache
path fasta
path extra_files path extra_files
output: output:
@ -27,6 +28,8 @@ process ENSEMBLVEP {
def args = task.ext.args ?: '' def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}" def prefix = task.ext.prefix ?: "${meta.id}"
def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep" def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep"
def reference = fasta ? "--fasta $fasta" : ""
""" """
mkdir $prefix mkdir $prefix
@ -34,6 +37,7 @@ process ENSEMBLVEP {
-i $vcf \\ -i $vcf \\
-o ${prefix}.ann.vcf \\ -o ${prefix}.ann.vcf \\
$args \\ $args \\
$reference \\
--assembly $genome \\ --assembly $genome \\
--species $species \\ --species $species \\
--cache \\ --cache \\

View file

@ -36,6 +36,11 @@ input:
type: file type: file
description: | description: |
path to VEP cache (optional) path to VEP cache (optional)
- fasta:
type: file
description: |
reference FASTA file (optional)
pattern: "*.{fasta,fa}"
- extra_files: - extra_files:
type: tuple type: tuple
description: | description: |

View file

@ -0,0 +1,54 @@
process GATK_INDELREALIGNER {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::gatk=3.5" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/gatk:3.5--hdfd78af_11':
'quay.io/biocontainers/gatk:3.5--hdfd78af_11' }"
input:
tuple val(meta), path(bam), path(bai), path(intervals)
path(fasta)
path(fai)
path(dict)
path(known_vcf)
output:
tuple val(meta), path("*.bam"), path("*.bai"), emit: bam
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def known = known_vcf ? "-known ${known_vcf}" : ""
if ("$bam" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!"
def avail_mem = 3
if (!task.memory) {
log.info '[GATK IndelRealigner] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
}
"""
gatk3 \\
-Xmx${avail_mem}g \\
-T IndelRealigner \\
-R ${fasta} \\
-I ${bam} \\
--targetIntervals ${intervals} \\
${known} \\
-o ${prefix}.bam \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gatk: \$(echo \$(gatk3 --version))
END_VERSIONS
"""
}

View file

@ -0,0 +1,71 @@
name: "gatk_indelrealigner"
description: Performs local realignment around indels to correct for mapping errors
keywords:
- bam
- vcf
- variant calling
- indel
- realignment
tools:
- "gatk":
description: "The full Genome Analysis Toolkit (GATK) framework, license restricted."
homepage: "https://gatk.broadinstitute.org/hc/en-us"
documentation: "https://github.com/broadinstitute/gatk-docs"
licence: "['https://software.broadinstitute.org/gatk/download/licensing', 'BSD', 'https://www.broadinstitute.org/gatk/about/#licensing']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: Sorted and indexed BAM file
pattern: "*.bam"
- bai:
type: file
description: BAM index file
pattern: "*.bai"
- intervals:
type: file
description: Intervals file created by gatk3 RealignerTargetCreator
pattern: "*.{intervals,list}"
- fasta:
type: file
description: Reference file used to generate BAM file
pattern: ".{fasta,fa,fna}"
- fai:
type: file
description: Index of reference file used to generate BAM file
pattern: ".fai"
- dict:
type: file
description: GATK dict file for reference
pattern: ".dict"
- known_vcf:
type: file
description: Optional input VCF file(s) with known indels
pattern: ".vcf"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- bam:
type: file
description: Sorted and indexed BAM file with local realignment around variants
pattern: "*.bam"
- bai:
type: file
description: Output BAM Index file
pattern: "*.bai"
authors:
- "@jfy133"

View file

@ -0,0 +1,53 @@
process GATK_REALIGNERTARGETCREATOR {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::gatk=3.5" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/gatk:3.5--hdfd78af_11':
'quay.io/biocontainers/gatk:3.5--hdfd78af_11' }"
input:
tuple val(meta), path(input), path(index)
path fasta
path fai
path dict
path known_vcf
output:
tuple val(meta), path("*.intervals"), emit: intervals
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def known = known_vcf ? "-known ${known_vcf}" : ""
if ("$input" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!"
def avail_mem = 3
if (!task.memory) {
log.info '[GATK RealignerTargetCreator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
}
"""
gatk3 \\
-Xmx${avail_mem}g \\
-T RealignerTargetCreator \\
-nt ${task.cpus} \\
-I ${input} \\
-R ${fasta} \\
-o ${prefix}.intervals \\
${known} \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gatk: \$(echo \$(gatk3 --version))
END_VERSIONS
"""
}

View file

@ -0,0 +1,64 @@
name: "gatk_realignertargetcreator"
description: Generates a list of locations that should be considered for local realignment prior genotyping.
keywords:
- bam
- vcf
- variant calling
- indel
- realignment
- targets
tools:
- "gatk":
description: "The full Genome Analysis Toolkit (GATK) framework, license restricted."
homepage: "https://gatk.broadinstitute.org/hc/en-us"
documentation: "https://github.com/broadinstitute/gatk-docs"
licence: "['https://software.broadinstitute.org/gatk/download/licensing', 'BSD', 'https://www.broadinstitute.org/gatk/about/#licensing']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- input:
type: file
description: Sorted and indexed BAM/CRAM/SAM file
pattern: "*.bam"
- index:
type: file
description: BAM index file
pattern: "*.bai"
- fasta:
type: file
description: Reference file used to generate BAM file
pattern: ".{fasta,fa,fna}"
- fai:
type: file
description: Index of reference file used to generate BAM file
pattern: ".fai"
- dict:
type: file
description: GATK dict file for reference
pattern: ".dict"
- known_vcf:
type: file
description: Optional input VCF file(s) with known indels
pattern: ".vcf"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- intervals:
type: file
description: File containg intervals that represent sites of extant and potential indels.
pattern: "*.intervals"
authors:
- "@jfy133"

View file

@ -2,10 +2,8 @@ process GATK4_APPLYBQSR_SPARK {
tag "$meta.id" tag "$meta.id"
label 'process_low' label 'process_low'
conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1 conda-forge::openjdk=8.0.312" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? container 'broadinstitute/gatk:4.2.6.1'
'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0':
'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }"
input: input:
tuple val(meta), path(input), path(input_index), path(bqsr_table), path(intervals) tuple val(meta), path(input), path(input_index), path(bqsr_table), path(intervals)

View file

@ -2,10 +2,8 @@ process GATK4_BASERECALIBRATOR_SPARK {
tag "$meta.id" tag "$meta.id"
label 'process_low' label 'process_low'
conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1 conda-forge::openjdk=8.0.312" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? container 'broadinstitute/gatk:4.2.6.1'
'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0':
'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }"
input: input:
tuple val(meta), path(input), path(input_index), path(intervals) tuple val(meta), path(input), path(input_index), path(intervals)

View file

@ -2,10 +2,8 @@ process GATK4_MARKDUPLICATES_SPARK {
tag "$meta.id" tag "$meta.id"
label 'process_high' label 'process_high'
conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1 conda-forge::openjdk=8.0.312" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? container 'broadinstitute/gatk:4.2.6.1'
'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' :
'broadinstitute/gatk:4.2.3.0' }"
input: input:
tuple val(meta), path(bam) tuple val(meta), path(bam)
@ -15,6 +13,7 @@ process GATK4_MARKDUPLICATES_SPARK {
output: output:
tuple val(meta), path("${prefix}"), emit: output tuple val(meta), path("${prefix}"), emit: output
tuple val(meta), path("*.metrics"), emit: metrics, optional: true
path "versions.yml" , emit: versions path "versions.yml" , emit: versions
when: when:
@ -25,6 +24,7 @@ process GATK4_MARKDUPLICATES_SPARK {
prefix = task.ext.prefix ?: "${meta.id}" prefix = task.ext.prefix ?: "${meta.id}"
def input_list = bam.collect{"--input $it"}.join(' ') def input_list = bam.collect{"--input $it"}.join(' ')
def avail_mem = 3 def avail_mem = 3
if (!task.memory) { if (!task.memory) {
log.info '[GATK MarkDuplicatesSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' log.info '[GATK MarkDuplicatesSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
@ -32,8 +32,6 @@ process GATK4_MARKDUPLICATES_SPARK {
avail_mem = task.memory.giga avail_mem = task.memory.giga
} }
""" """
export SPARK_USER=spark3
gatk --java-options "-Xmx${avail_mem}g" MarkDuplicatesSpark \\ gatk --java-options "-Xmx${avail_mem}g" MarkDuplicatesSpark \\
$input_list \\ $input_list \\
--output $prefix \\ --output $prefix \\
@ -45,6 +43,7 @@ process GATK4_MARKDUPLICATES_SPARK {
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
"${task.process}": "${task.process}":
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
openjdk: \$(echo \$(java -version 2>&1) | grep version | sed 's/\"//g' | cut -f3 -d ' ')
END_VERSIONS END_VERSIONS
""" """
} }

View file

@ -58,3 +58,4 @@ authors:
- "@ajodeh-juma" - "@ajodeh-juma"
- "@FriederikeHanssen" - "@FriederikeHanssen"
- "@maxulysse" - "@maxulysse"
- "@SusiJo"

View file

@ -0,0 +1,52 @@
process GATK4_REBLOCKGVCF {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0':
'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }"
input:
tuple val(meta), path(gvcf), path(tbi), path(intervals)
path fasta
path fai
path dict
path dbsnp
path dbsnp_tbi
output:
tuple val(meta), path("*.rb.g.vcf.gz"), path("*.tbi") , emit: vcf
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def dbsnp_command = dbsnp ? "--dbsnp $dbsnp" : ""
def interval_command = intervals ? "--intervals $intervals" : ""
def avail_mem = 3
if (!task.memory) {
log.info '[GATK ReblockGVCF] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
}
"""
gatk --java-options "-Xmx${avail_mem}g" ReblockGVCF \\
--variant $gvcf \\
--output ${prefix}.rb.g.vcf.gz \\
--reference $fasta \\
$dbsnp_command \\
$interval_command \\
--tmp-dir . \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
END_VERSIONS
"""
}

View file

@ -0,0 +1,74 @@
name: "gatk4_reblockgvcf"
description: Condenses homRef blocks in a single-sample GVCF
keywords:
- gatk4
- reblockgvcf
- gvcf
tools:
- gatk4:
description: |
Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools
with a primary focus on variant discovery and genotyping. Its powerful processing engine
and high-performance computing features make it capable of taking on projects of any size.
homepage: https://gatk.broadinstitute.org/hc/en-us
documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s
doi: 10.1158/1538-7445.AM2017-3590
licence: ["Apache-2.0"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- gvcf:
type: file
description: GVCF file created using HaplotypeCaller using the '-ERC GVCF' or '-ERC BP_RESOLUTION' mode
pattern: "*.{vcf,gvcf}.gz"
- tbi:
type: file
description: Index of the GVCF file
pattern: "*.tbi"
- intervals:
type: file
description: Bed file with the genomic regions included in the library (optional)
- fasta:
type: file
description: The reference fasta file
pattern: "*.fasta"
- fai:
type: file
description: Index of reference fasta file
pattern: "fasta.fai"
- dict:
type: file
description: GATK sequence dictionary
pattern: "*.dict"
- dbsnp:
type: file
description: VCF file containing known sites (optional)
- dbsnp_tbi:
type: file
description: VCF index of dbsnp (optional)
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- gvcf:
type: file
description: Filtered GVCF
pattern: "*rb.g.vcf.gz"
- tbi:
type: file
description: Index of the filtered GVCF
pattern: "*rb.g.vcf.gz.tbi"
authors:
- "@nvnieuwk"

View file

@ -0,0 +1,40 @@
process SEXDETERRMINE {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::sexdeterrmine=1.1.2" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/sexdeterrmine:1.1.2--hdfd78af_1':
'quay.io/biocontainers/sexdeterrmine:1.1.2--hdfd78af_1' }"
input:
tuple val(meta), path(depth)
path sample_list_file
output:
tuple val(meta), path("*.json"), emit: json
tuple val(meta), path("*.tsv") , emit: tsv
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def sample_list = sample_list_file ? '-f ${sample_list_file}' : ''
if ("$depth" == "${prefix}.tsv") error "Input depth and output TSV names are the same, set prefix in module configuration to disambiguate!"
"""
sexdeterrmine \\
-I $depth \\
$sample_list \\
$args \\
> ${prefix}.tsv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
sexdeterrmine: \$(echo \$(sexdeterrmine --version 2>&1))
END_VERSIONS
"""
}

View file

@ -0,0 +1,48 @@
name: "sexdeterrmine"
description: Calculate the relative coverage on the Gonosomes vs Autosomes from the output of samtools depth, with error bars.
keywords:
- sex determination
- genetic sex
- relative coverage
- ancient dna
tools:
- "sexdeterrmine":
description: "A python script carry out calculate the relative coverage of X and Y chromosomes, and their associated error bars, out of capture data."
homepage: "https://github.com/TCLamnidis/Sex.DetERRmine"
documentation: "https://github.com/TCLamnidis/Sex.DetERRmine/README.md"
tool_dev_url: "https://github.com/TCLamnidis/Sex.DetERRmine"
doi: "https://doi.org/10.1038/s41467-018-07483-5"
licence: "['GPL v3']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- depth:
type: file
description: Output from samtools depth (with header)
pattern: "*"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- json:
type: file
description: JSON formatted table of relative coverages on the X and Y, with associated error bars.
pattern: "*.json"
- tsv:
type: file
description: TSV table of relative coverages on the X and Y, with associated error bars.
pattern: "*.tsv"
authors:
- "@TCLamnidis"

View file

@ -26,6 +26,10 @@ allelecounter:
- modules/allelecounter/** - modules/allelecounter/**
- tests/modules/allelecounter/** - tests/modules/allelecounter/**
ampir:
- modules/ampir/**
- tests/modules/ampir/**
amplify/predict: amplify/predict:
- modules/amplify/predict/** - modules/amplify/predict/**
- tests/modules/amplify/predict/** - tests/modules/amplify/predict/**
@ -715,6 +719,14 @@ gamma/gamma:
- modules/gamma/gamma/** - modules/gamma/gamma/**
- tests/modules/gamma/gamma/** - tests/modules/gamma/gamma/**
gatk/indelrealigner:
- modules/gatk/indelrealigner/**
- tests/modules/gatk/indelrealigner/**
gatk/realignertargetcreator:
- modules/gatk/realignertargetcreator/**
- tests/modules/gatk/realignertargetcreator/**
gatk/unifiedgenotyper: gatk/unifiedgenotyper:
- modules/gatk/unifiedgenotyper/** - modules/gatk/unifiedgenotyper/**
- tests/modules/gatk/unifiedgenotyper/** - tests/modules/gatk/unifiedgenotyper/**
@ -851,6 +863,10 @@ gatk4/mutect2:
- modules/gatk4/mutect2/** - modules/gatk4/mutect2/**
- tests/modules/gatk4/mutect2/** - tests/modules/gatk4/mutect2/**
gatk4/reblockgvcf:
- modules/gatk4/reblockgvcf/**
- tests/modules/gatk4/reblockgvcf/**
gatk4/revertsam: gatk4/revertsam:
- modules/gatk4/revertsam/** - modules/gatk4/revertsam/**
- tests/modules/gatk4/revertsam/** - tests/modules/gatk4/revertsam/**
@ -1843,6 +1859,10 @@ seqwish/induce:
- modules/seqwish/induce/** - modules/seqwish/induce/**
- tests/modules/seqwish/induce/** - tests/modules/seqwish/induce/**
sexdeterrmine:
- modules/sexdeterrmine/**
- tests/modules/sexdeterrmine/**
shasum: shasum:
- modules/shasum/** - modules/shasum/**
- tests/modules/shasum/** - tests/modules/shasum/**

View file

@ -232,10 +232,11 @@ params {
test2_paired_end_umi_unsorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_unsorted.bam" test2_paired_end_umi_unsorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_unsorted.bam"
test2_paired_end_umi_unsorted_tagged_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.unsorted_tagged.bam" test2_paired_end_umi_unsorted_tagged_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.unsorted_tagged.bam"
mitochon_standin_recalibrated_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/mitochon_standin.recalibrated.sorted.bam" mitochon_standin_recalibrated_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/mitochon_standin.recalibrated.sorted.bam"
mitochon_standin_recalibrated_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/mitochon_standin.recalibrated.sorted.bam.bai" mitochon_standin_recalibrated_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/mitochon_standin.recalibrated.sorted.bam.bai"
test3_single_end_markduplicates_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam"
test_paired_end_sorted_cram = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram" test_paired_end_sorted_cram = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram"
test_paired_end_sorted_cram_crai = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai" test_paired_end_sorted_cram_crai = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai"
test_paired_end_markduplicates_sorted_cram = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test.paired_end.markduplicates.sorted.cram" test_paired_end_markduplicates_sorted_cram = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test.paired_end.markduplicates.sorted.cram"

View file

@ -0,0 +1,20 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { AMPIR } from '../../../modules/ampir/main.nf'
workflow test_ampir {
fasta = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['proteome_fasta'], checkIfExists: true),
]
model = "precursor"
min_length = []
min_probability = "0.7"
AMPIR ( fasta, model, min_length, min_probability )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,11 @@
- name: ampir test_ampir
command: nextflow run ./tests/modules/ampir -entry test_ampir -c ./tests/config/nextflow.config -c ./tests/modules/ampir/nextflow.config
tags:
- ampir
files:
- path: output/ampir/test.tsv
contains: ["seq_name\tseq_aa\tprob_AMP", "WP_014895017.1"]
- path: output/ampir/test.faa
md5sum: 0435609144022c55ac196db053f0df89
- path: output/ampir/versions.yml
md5sum: 4a11d25b8a904a7ffb34ae88f6826888

View file

@ -1,19 +1,65 @@
- name: cellranger count test_cellranger_count - name: cellranger count test_cellranger_count
command: nextflow run tests/modules/cellranger/count -entry test_cellranger_count -c tests/config/nextflow.config -c tests/modules/cellranger/count/nextflow.config command: nextflow run ./tests/modules/cellranger/count -entry test_cellranger_count -c ./tests/config/nextflow.config -c ./tests/modules/cellranger/count/nextflow.config
tags: tags:
- cellranger
- cellranger/count - cellranger/count
- cellranger
files: files:
- path: output/cellranger/genome.filtered.gtf
md5sum: a8b8a7b5039e05d3a9cf9151ea138b5b
- path: output/cellranger/homo_sapiens_chr22_reference/fasta/genome.fa
md5sum: f315020d899597c1b57e5fe9f60f4c3e
- path: output/cellranger/homo_sapiens_chr22_reference/fasta/genome.fa.fai
md5sum: 3520cd30e1b100e55f578db9c855f685
- path: output/cellranger/homo_sapiens_chr22_reference/genes/genes.gtf.gz
md5sum: d1e05cd46684fa26d852b6bc9f05e31f
- path: output/cellranger/homo_sapiens_chr22_reference/reference.json
md5sum: 8405fd7f527a944eafb9c2909045840b
- path: output/cellranger/homo_sapiens_chr22_reference/star/Genome
md5sum: 897cec2d191945335f8b320438bd9135
- path: output/cellranger/homo_sapiens_chr22_reference/star/SA
md5sum: 7961129ac5d0e1706105be1d31c6b30c
- path: output/cellranger/homo_sapiens_chr22_reference/star/SAindex
md5sum: dcceb480b30cda93fb8c63ddc339093b
- path: output/cellranger/homo_sapiens_chr22_reference/star/chrLength.txt
md5sum: c81f40f27e72606d7d07097c1d56a5b5
- path: output/cellranger/homo_sapiens_chr22_reference/star/chrName.txt
md5sum: 5ae68a67b70976ee95342a7451cb5af1
- path: output/cellranger/homo_sapiens_chr22_reference/star/chrNameLength.txt
md5sum: b190587cae0531f3cf25552d8aa674db
- path: output/cellranger/homo_sapiens_chr22_reference/star/chrStart.txt
md5sum: bc73df776dd3d5bb9cfcbcba60880519
- path: output/cellranger/homo_sapiens_chr22_reference/star/exonGeTrInfo.tab
md5sum: 9129691eeb4ed0d02b17be879fa3edb0
- path: output/cellranger/homo_sapiens_chr22_reference/star/exonInfo.tab
md5sum: 209b82f0683efd03e17d2c729676554f
- path: output/cellranger/homo_sapiens_chr22_reference/star/geneInfo.tab
md5sum: 02a8f4575bdfcd4a42b4d8d07f2e9369
- path: output/cellranger/homo_sapiens_chr22_reference/star/genomeParameters.txt
- path: output/cellranger/homo_sapiens_chr22_reference/star/sjdbInfo.txt
md5sum: 1082ab459363b3f2f7aabcef0979c1ed
- path: output/cellranger/homo_sapiens_chr22_reference/star/sjdbList.fromGTF.out.tab
- path: output/cellranger/homo_sapiens_chr22_reference/star/sjdbList.out.tab
- path: output/cellranger/homo_sapiens_chr22_reference/star/transcriptInfo.tab
md5sum: cedcb5f4e7d97bc548cd5daa022e092c
- path: output/cellranger/sample-123/outs/filtered_feature_bc_matrix.h5 - path: output/cellranger/sample-123/outs/filtered_feature_bc_matrix.h5
md5sum: f8b6b7cc8248151a98c46d4ebec450c6
- path: output/cellranger/sample-123/outs/filtered_feature_bc_matrix/barcodes.tsv.gz
- path: output/cellranger/sample-123/outs/filtered_feature_bc_matrix/features.tsv.gz
- path: output/cellranger/sample-123/outs/filtered_feature_bc_matrix/matrix.mtx.gz
- path: output/cellranger/sample-123/outs/metrics_summary.csv - path: output/cellranger/sample-123/outs/metrics_summary.csv
md5sum: 707df0f101d479d93f412ca74f9c4131 md5sum: 707df0f101d479d93f412ca74f9c4131
- path: output/cellranger/sample-123/outs/molecule_info.h5 - path: output/cellranger/sample-123/outs/molecule_info.h5
md5sum: 0e56836ef0725f2ab05f56ca5a71e55b md5sum: a13bd7425f441c8d0eac8ffc50082996
- path: output/cellranger/sample-123/outs/possorted_genome_bam.bam - path: output/cellranger/sample-123/outs/possorted_genome_bam.bam
md5sum: 15441da9cfceea0bb48c8b66b1b860df md5sum: 15441da9cfceea0bb48c8b66b1b860df
- path: output/cellranger/sample-123/outs/possorted_genome_bam.bam.bai - path: output/cellranger/sample-123/outs/possorted_genome_bam.bam.bai
md5sum: 7c3d49c77016a09535aff61a027f750c md5sum: 7c3d49c77016a09535aff61a027f750c
- path: output/cellranger/sample-123/outs/raw_feature_bc_matrix
- path: output/cellranger/sample-123/outs/raw_feature_bc_matrix.h5 - path: output/cellranger/sample-123/outs/raw_feature_bc_matrix.h5
md5sum: cdad1cd7b215d7137cf92515e81a8525 md5sum: a5290f3e300a4070f3d68a0c2e215f54
- path: output/cellranger/sample-123/outs/raw_feature_bc_matrix/barcodes.tsv.gz
md5sum: 5cc39ef0c7ac85f2b758b164aabf9157
- path: output/cellranger/sample-123/outs/raw_feature_bc_matrix/features.tsv.gz
md5sum: 07d497c7ce3e22f374af7b2cf9b97d72
- path: output/cellranger/sample-123/outs/raw_feature_bc_matrix/matrix.mtx.gz
md5sum: bdce94a51f16e22d40301724080b76ee
- path: output/cellranger/sample-123/outs/web_summary.html - path: output/cellranger/sample-123/outs/web_summary.html

View file

@ -5,7 +5,6 @@
- cellranger/mkfastq - cellranger/mkfastq
files: files:
- path: output/cellranger/cellranger-tiny-bcl-1/outs/fastq_path/fake_file.fastq.gz - path: output/cellranger/cellranger-tiny-bcl-1/outs/fastq_path/fake_file.fastq.gz
md5sum: d41d8cd98f00b204e9800998ecf8427e
- name: cellranger mkfastq test_cellranger_mkfastq_illumina - name: cellranger mkfastq test_cellranger_mkfastq_illumina
command: nextflow run tests/modules/cellranger/mkfastq -entry test_cellranger_mkfastq_illumina -c tests/config/nextflow.config -c ./tests/modules/cellranger/mkfastq/nextflow.config -stub-run command: nextflow run tests/modules/cellranger/mkfastq -entry test_cellranger_mkfastq_illumina -c tests/config/nextflow.config -c ./tests/modules/cellranger/mkfastq/nextflow.config -stub-run
tags: tags:
@ -13,4 +12,3 @@
- cellranger/mkfastq - cellranger/mkfastq
files: files:
- path: output/cellranger/cellranger-tiny-bcl-1/outs/fastq_path/fake_file.fastq.gz - path: output/cellranger/cellranger-tiny-bcl-1/outs/fastq_path/fake_file.fastq.gz
md5sum: d41d8cd98f00b204e9800998ecf8427e

View file

@ -1,8 +1,8 @@
- name: cellranger mkgtf test_cellranger_mkgtf - name: cellranger mkgtf test_cellranger_mkgtf
command: nextflow run tests/modules/cellranger/mkgtf -entry test_cellranger_mkgtf -c tests/config/nextflow.config -c tests/modules/cellranger/mkgtf/nextflow.config command: nextflow run ./tests/modules/cellranger/mkgtf -entry test_cellranger_mkgtf -c ./tests/config/nextflow.config -c ./tests/modules/cellranger/mkgtf/nextflow.config
tags: tags:
- cellranger
- cellranger/mkgtf - cellranger/mkgtf
- cellranger
files: files:
- path: output/cellranger/genome.filtered.gtf - path: output/cellranger/genome.filtered.gtf
md5sum: a8b8a7b5039e05d3a9cf9151ea138b5b md5sum: a8b8a7b5039e05d3a9cf9151ea138b5b

View file

@ -1,8 +1,8 @@
- name: cellranger mkref test_cellranger_mkref - name: cellranger mkref test_cellranger_mkref
command: nextflow run ./tests/modules/cellranger/mkref -entry test_cellranger_mkref -c ./tests/config/nextflow.config -c ./tests/modules/cellranger/mkref/nextflow.config command: nextflow run ./tests/modules/cellranger/mkref -entry test_cellranger_mkref -c ./tests/config/nextflow.config -c ./tests/modules/cellranger/mkref/nextflow.config
tags: tags:
- cellranger
- cellranger/mkref - cellranger/mkref
- cellranger
files: files:
- path: output/cellranger/homo_sapiens_chr22_reference/fasta/genome.fa - path: output/cellranger/homo_sapiens_chr22_reference/fasta/genome.fa
md5sum: f315020d899597c1b57e5fe9f60f4c3e md5sum: f315020d899597c1b57e5fe9f60f4c3e
@ -11,7 +11,7 @@
- path: output/cellranger/homo_sapiens_chr22_reference/genes/genes.gtf.gz - path: output/cellranger/homo_sapiens_chr22_reference/genes/genes.gtf.gz
md5sum: 6d9b5f409bfea95022bc25b9590e194e md5sum: 6d9b5f409bfea95022bc25b9590e194e
- path: output/cellranger/homo_sapiens_chr22_reference/reference.json - path: output/cellranger/homo_sapiens_chr22_reference/reference.json
md5sum: 5d8d1669cd251433505f183e1c9ed6bc md5sum: 6cc817f0923062e780e6573806840cea
- path: output/cellranger/homo_sapiens_chr22_reference/star/Genome - path: output/cellranger/homo_sapiens_chr22_reference/star/Genome
md5sum: 22102926fadf5890e905ca71b2da3f35 md5sum: 22102926fadf5890e905ca71b2da3f35
- path: output/cellranger/homo_sapiens_chr22_reference/star/SA - path: output/cellranger/homo_sapiens_chr22_reference/star/SA

View file

@ -4,11 +4,22 @@ nextflow.enable.dsl = 2
include { ENSEMBLVEP } from '../../../modules/ensemblvep/main.nf' include { ENSEMBLVEP } from '../../../modules/ensemblvep/main.nf'
workflow test_ensemblvep { workflow test_ensemblvep_fasta {
input = [ input = [
[ id:'test' ], // meta map [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)
] ]
ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], [] ) fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], fasta, [] )
}
workflow test_ensemblvep_no_fasta {
input = [
[ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)
]
ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], [], [] )
} }

View file

@ -1,5 +1,13 @@
- name: ensemblvep test_ensemblvep - name: ensemblvep test_ensemblvep_fasta
command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep_fasta -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config
tags:
- ensemblvep
files:
- path: output/ensemblvep/test.ann.vcf
- path: output/ensemblvep/test.summary.html
- name: ensemblvep test_ensemblvep_no_fasta
command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep_no_fasta -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config
tags: tags:
- ensemblvep - ensemblvep
files: files:

View file

@ -0,0 +1,33 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { GATK_REALIGNERTARGETCREATOR } from '../../../../modules/gatk/realignertargetcreator/main.nf'
include { GATK_INDELREALIGNER } from '../../../../modules/gatk/indelrealigner/main.nf'
workflow test_gatk_indelrealigner {
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
input_realignertargetcreator = [ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
]
GATK_REALIGNERTARGETCREATOR ( input_realignertargetcreator, fasta, fai, dict, [] )
ch_intervals = GATK_REALIGNERTARGETCREATOR.out.intervals
ch_bams_indelrealigner = Channel.of([ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)
])
ch_input_indelrealigner = ch_bams_indelrealigner.mix(ch_intervals).groupTuple(by: 0).map{ [it[0], it[1][0], it[2], it[1][1] ] }.dump(tag: "input")
GATK_INDELREALIGNER ( ch_input_indelrealigner, fasta, fai, dict, [] )
}

View file

@ -0,0 +1,6 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
ext.prefix = { "${meta.id}.realigned" }
}

View file

@ -0,0 +1,12 @@
- name: gatk indelrealigner test_gatk_indelrealigner
command: nextflow run ./tests/modules/gatk/indelrealigner -entry test_gatk_indelrealigner -c ./tests/config/nextflow.config -c ./tests/modules/gatk/indelrealigner/nextflow.config
tags:
- gatk/indelrealigner
- gatk
files:
- path: output/gatk/test.realigned.bai
md5sum: 85a67df8827fe426e7f3a458134c0551
- path: output/gatk/test.realigned.bam
md5sum: ea1df6f7fcafc408fae4dc1574813d8a
- path: output/gatk/test.realigned.intervals
md5sum: 7aa7a1b235a510e6591e262382086bf8

View file

@ -0,0 +1,18 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { GATK_REALIGNERTARGETCREATOR } from '../../../../modules/gatk/realignertargetcreator/main.nf'
workflow test_gatk_realignertargetcreator {
input = [ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
]
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
GATK_REALIGNERTARGETCREATOR ( input, fasta, fai, dict, [] )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,8 @@
- name: gatk realignertargetcreator test_gatk_realignertargetcreator
command: nextflow run ./tests/modules/gatk/realignertargetcreator -entry test_gatk_realignertargetcreator -c ./tests/config/nextflow.config -c ./tests/modules/gatk/realignertargetcreator/nextflow.config
tags:
- gatk
- gatk/realignertargetcreator
files:
- path: output/gatk/test.intervals
md5sum: 7aa7a1b235a510e6591e262382086bf8

View file

@ -15,7 +15,6 @@
- gatk4/applybqsrspark - gatk4/applybqsrspark
files: files:
- path: output/gatk4/test.bam - path: output/gatk4/test.bam
md5sum: 2ca2446f0125890280056fd7da822732
- path: output/gatk4/versions.yml - path: output/gatk4/versions.yml
- name: gatk4 applybqsr test_gatk4_applybqsr_spark_cram - name: gatk4 applybqsr test_gatk4_applybqsr_spark_cram

View file

@ -3,26 +3,55 @@
nextflow.enable.dsl = 2 nextflow.enable.dsl = 2
include { GATK4_MARKDUPLICATES_SPARK } from '../../../../modules/gatk4/markduplicatesspark/main.nf' include { GATK4_MARKDUPLICATES_SPARK } from '../../../../modules/gatk4/markduplicatesspark/main.nf'
include { GATK4_MARKDUPLICATES_SPARK as GATK4_MARKDUPLICATES_SPARK_CRAM } from '../../../../modules/gatk4/markduplicatesspark/main.nf'
include { GATK4_MARKDUPLICATES_SPARK as GATK4_MARKDUPLICATES_SPARK_METRICS } from '../../../../modules/gatk4/markduplicatesspark/main.nf'
workflow test_gatk4_markduplicates_spark { workflow test_gatk4_markduplicates_spark {
input = [ [ id:'test', single_end:false ], // meta map input = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
] ]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict ) GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict )
} }
// chr 22
workflow test_gatk4_markduplicates_spark_multiple_bams { workflow test_gatk4_markduplicates_spark_multiple_bams {
input = [ [ id:'test', single_end:false ], // meta map input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true) file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true)
] ] ] ]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict ) GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict )
} }
// chr 22
workflow test_gatk4_markduplicates_spark_multiple_bams_cram_out {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true)
] ]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
GATK4_MARKDUPLICATES_SPARK_CRAM ( input, fasta, fai, dict )
}
// chr 22
workflow test_gatk4_markduplicates_spark_multiple_bams_metrics {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true)
] ]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
GATK4_MARKDUPLICATES_SPARK_METRICS ( input, fasta, fai, dict )
}

View file

@ -2,4 +2,18 @@ process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: GATK4_MARKDUPLICATES_SPARK {
ext.prefix = { "${meta.id}.bam" }
}
withName: GATK4_MARKDUPLICATES_SPARK_CRAM {
ext.prefix = { "${meta.id}.cram" }
}
withName: GATK4_MARKDUPLICATES_SPARK_METRICS {
ext.args = '--metrics-file test.metrics'
ext.prefix = { "${meta.id}.bam" }
}
} }
// override tests/config/nextflow.config
docker.userEmulation = false

View file

@ -1,25 +1,41 @@
- name: gatk4 markduplicates test_gatk4_markduplicates_spark - name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark
command: nextflow run tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
tags: tags:
- gatk4 - gatk4
- gatk4/markduplicatesspark - gatk4/markduplicatesspark
files: files:
- path: output/gatk4/test.bai
md5sum: e9c125e82553209933883b4fe2b8d7c2
- path: output/gatk4/test.bam - path: output/gatk4/test.bam
md5sum: 2efd50b2e6b7fd9bdf242cd9e266cfa9 md5sum: dc1a09ac6371aab7c50d1a554baa06d3
- path: output/gatk4/test.metrics
- path: output/gatk4/versions.yml - path: output/gatk4/versions.yml
- name: gatk4 markduplicates test_gatk4_markduplicates_spark_multiple_bams - name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams
command: nextflow run tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
tags: tags:
- gatk4 - gatk4
- gatk4/markduplicatesspark - gatk4/markduplicatesspark
files: files:
- path: output/gatk4/test.bai
md5sum: bad71df9c876e72a5bc0a3e0fd755f92
- path: output/gatk4/test.bam - path: output/gatk4/test.bam
md5sum: 8187febc6108ffef7f907e89b9c091a4 md5sum: 898cb0a6616897d8ada90bab53bf0837
- path: output/gatk4/test.metrics - path: output/gatk4/versions.yml
- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_cram_out
command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_cram_out -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
tags:
- gatk4
- gatk4/markduplicatesspark
files:
- path: output/gatk4/test.cram
md5sum: 2271016de5e4199736598f39d12d7587
- path: output/gatk4/versions.yml
- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_metrics
command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_metrics -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
tags:
- gatk4
- gatk4/markduplicatesspark
files:
- path: output/gatk4/test.bam
md5sum: 898cb0a6616897d8ada90bab53bf0837
- path: output/gatk4/test.metrics
contains: ["## METRICS CLASS", "org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics"]
- path: output/gatk4/versions.yml - path: output/gatk4/versions.yml

View file

@ -0,0 +1,55 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { GATK4_REBLOCKGVCF } from '../../../../modules/gatk4/reblockgvcf/main.nf'
workflow test_gatk4_reblockgvcf {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true),
[]
]
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
fasta_index = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
GATK4_REBLOCKGVCF ( input, fasta, fasta_index, dict, [], [] )
}
workflow test_gatk4_reblockgvcf_intervals {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true),
file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true)
]
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
fasta_index = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
GATK4_REBLOCKGVCF ( input, fasta, fasta_index, dict, [], [] )
}
workflow test_gatk4_reblockgvcf_dbsnp {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz_tbi'], checkIfExists: true),
[]
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fasta_index = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true)
dbsnp_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true)
GATK4_REBLOCKGVCF ( input, fasta, fasta_index, dict, dbsnp, dbsnp_tbi )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,26 @@
- name: gatk4 reblockgvcf test_gatk4_reblockgvcf
command: nextflow run ./tests/modules/gatk4/reblockgvcf -entry test_gatk4_reblockgvcf -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/reblockgvcf/nextflow.config
tags:
- gatk4/reblockgvcf
- gatk4
files:
- path: output/gatk4/test.rb.g.vcf.gz
- path: output/gatk4/test.rb.g.vcf.gz.tbi
- name: gatk4 reblockgvcf test_gatk4_reblockgvcf_intervals
command: nextflow run ./tests/modules/gatk4/reblockgvcf -entry test_gatk4_reblockgvcf_intervals -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/reblockgvcf/nextflow.config
tags:
- gatk4/reblockgvcf
- gatk4
files:
- path: output/gatk4/test.rb.g.vcf.gz
- path: output/gatk4/test.rb.g.vcf.gz.tbi
- name: gatk4 reblockgvcf test_gatk4_reblockgvcf_dbsnp
command: nextflow run ./tests/modules/gatk4/reblockgvcf -entry test_gatk4_reblockgvcf_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/reblockgvcf/nextflow.config
tags:
- gatk4/reblockgvcf
- gatk4
files:
- path: output/gatk4/test.rb.g.vcf.gz
- path: output/gatk4/test.rb.g.vcf.gz.tbi

View file

@ -0,0 +1,16 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { SAMTOOLS_DEPTH } from '../../../modules/samtools/depth/main.nf'
include { SEXDETERRMINE } from '../../../modules/sexdeterrmine/main.nf'
workflow test_sexdeterrmine {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test3_single_end_markduplicates_sorted_bam'], checkIfExists: true) ]
SAMTOOLS_DEPTH ( input )
SEXDETERRMINE ( SAMTOOLS_DEPTH.out.tsv, [] )
}

View file

@ -0,0 +1,12 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName:SAMTOOLS_DEPTH {
ext.args = "-H"
}
withName:SEXDETERRMINE {
ext.prefix = { "${meta.id}_sexdet" }
}
}

View file

@ -0,0 +1,15 @@
- name: sexdeterrmine test_sexdeterrmine
command: nextflow run tests/modules/sexdeterrmine -entry test_sexdeterrmine -c tests/config/nextflow.config
tags:
- sexdeterrmine
files:
- path: output/samtools/test.tsv
md5sum: fa2992ca1ea93a6e1b3e838476191935
- path: output/samtools/versions.yml
md5sum: dbd04b700335c8ad236bd667254c8dd8
- path: output/sexdeterrmine/sexdeterrmine.json
md5sum: bafb2419bb8630eda29a251c20e97166
- path: output/sexdeterrmine/test_sexdet.tsv
md5sum: 1cf8a2b97b38353eb97a96ab872dcca9
- path: output/sexdeterrmine/versions.yml
md5sum: 077361101e8e7997aec3da8a01e59eee

View file

@ -56,5 +56,5 @@ def test_ensure_valid_version_yml(workflow_dir):
assert len(software_versions), "There must be at least one version emitted." assert len(software_versions), "There must be at least one version emitted."
for tool, version in software_versions.items(): for tool, version in software_versions.items():
assert re.match( assert re.match(
r"^\d+.*", str(version) r"^\d.*|^[a-f0-9]{40}$", str(version)
), f"Version number for {tool} must start with a number. " ), f"Version number for {tool} must start with a number, or be a Git SHA commit id. "