Merge branch 'master' into update_tiddit

2024-12-22 02:58:17 +00:00 · 2022-06-08 11:16:01 +02:00 · 2022-06-08 11:16:01 +02:00 · 7230bf2174
commit 7230bf2174
parent 97e66015ca f147b32fbf
13 changed files with 409 additions and 7 deletions
--- a/modules/gatk/unifiedgenotyper/main.nf
+++ b/modules/gatk/unifiedgenotyper/main.nf
@ -0,0 +1,63 @@
+process GATK_UNIFIEDGENOTYPER {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda (params.enable_conda ? "bioconda::gatk=3.5" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/gatk:3.5--hdfd78af_11':
+        'quay.io/biocontainers/gatk:3.5--hdfd78af_11' }"
+
+    input:
+    tuple val(meta), path(input), path(index)
+    path  fasta
+    path  fai
+    path  dict
+    path  intervals
+    path  contamination
+    path  dbsnp
+    path  comp
+
+    output:
+    tuple val(meta), path("*.vcf.gz"), emit: vcf
+    path "versions.yml"              , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def contamination_file = contamination ? "-contaminationFile ${contamination}" : ""
+    def dbsnp_file = dbsnp ? "--dbsnp ${dbsnp}" : ""
+    def comp_file = comp ? "--comp ${comp}" : ""
+    def intervals_file = intervals ? "--intervals ${intervals}" : ""
+
+    def avail_mem = 3
+    if (!task.memory) {
+        log.info '[GATK RealignerTargetCreator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
+    } else {
+        avail_mem = task.memory.giga
+    }
+
+    """
+    gatk3 \\
+        -Xmx${avail_mem}g \\
+        -nt ${task.cpus} \\
+        -T UnifiedGenotyper \\
+        -I ${input} \\
+        -R ${fasta} \\
+        ${contamination_file} \\
+        ${dbsnp_file} \\
+        ${comp_file} \\
+        ${intervals_file} \\
+        -o ${prefix}.vcf \\
+        $args
+
+    gzip -n *.vcf
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        gatk: \$(echo \$(gatk3 --version))
+    END_VERSIONS
+    """
+}
--- a/modules/gatk/unifiedgenotyper/meta.yml
+++ b/modules/gatk/unifiedgenotyper/meta.yml
@ -0,0 +1,73 @@
+name: "gatk_unifiedgenotyper"
+keywords:
+  - bam
+  - vcf
+  - variant calling
+tools:
+  - "gatk":
+      description: "The full Genome Analysis Toolkit (GATK) framework, license restricted."
+      homepage: "https://gatk.broadinstitute.org/hc/en-us"
+      documentation: "https://github.com/broadinstitute/gatk-docs"
+      licence: "['https://software.broadinstitute.org/gatk/download/licensing', 'BSD', 'https://www.broadinstitute.org/gatk/about/#licensing']"
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - input:
+      type: file
+      description: Sorted and indexed BAM/CRAM/SAM file
+      pattern: "*.bam"
+  - index:
+      type: file
+      description: BAM index file
+      pattern: "*.bai"
+  - fasta:
+      type: file
+      description: Reference file used to generate BAM file
+      pattern: ".{fasta,fa,fna}"
+  - fai:
+      type: file
+      description: Index of reference file used to generate BAM file
+      pattern: ".fai"
+  - dict:
+      type: file
+      description: GATK dict file for reference
+      pattern: ".dict"
+  - intervals:
+      type: file
+      description: Bed file with the genomic regions included in the library (optional)
+      pattern: "*.intervals"
+  - contamination:
+      type: file
+      description: Tab-separated file containing fraction of contamination in sequencing data (per sample) to aggressively remove
+      pattern: "*"
+  - dbsnps:
+      type: file
+      description: VCF file containing known sites (optional)
+      pattern: "*"
+  - comp:
+      type: file
+      description: Comparison VCF file (optional)
+      pattern: "*"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - vcf:
+      type: file
+      description: VCF file containing called variants
+      pattern: "*.vcf.gz"
+
+authors:
+  - "@ilight1542"
+  - "@jfy133"
--- a/modules/gatk4/calibratedragstrmodel/main.nf
+++ b/modules/gatk4/calibratedragstrmodel/main.nf
@ -1,6 +1,6 @@
 process GATK4_CALIBRATEDRAGSTRMODEL {
    tag "$meta.id"
-    label 'process_low'
+    label 'process_medium'

    conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
@ -38,6 +38,7 @@ process GATK4_CALIBRATEDRAGSTRMODEL {
        --output ${prefix}.txt \\
        --reference $fasta \\
        --str-table-path $strtablefile \\
+        --threads $task.cpus \\
        $intervals_command \\
        --tmp-dir . \\
        $args
--- a/modules/snippy/run/main.nf
+++ b/modules/snippy/run/main.nf
@ -0,0 +1,55 @@
+process SNIPPY_RUN {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda (params.enable_conda ? "bioconda::snippy=4.6.0" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/snippy:4.6.0--hdfd78af_2' :
+        'quay.io/biocontainers/snippy:4.6.0--hdfd78af_2' }"
+
+    input:
+    tuple val(meta), path(reads)
+    path reference
+
+    output:
+    tuple val(meta), path("${prefix}/${prefix}.tab")              , emit: tab
+    tuple val(meta), path("${prefix}/${prefix}.csv")              , emit: csv
+    tuple val(meta), path("${prefix}/${prefix}.html")             , emit: html
+    tuple val(meta), path("${prefix}/${prefix}.vcf")              , emit: vcf
+    tuple val(meta), path("${prefix}/${prefix}.bed")              , emit: bed
+    tuple val(meta), path("${prefix}/${prefix}.gff")              , emit: gff
+    tuple val(meta), path("${prefix}/${prefix}.bam")              , emit: bam
+    tuple val(meta), path("${prefix}/${prefix}.bam.bai")          , emit: bai
+    tuple val(meta), path("${prefix}/${prefix}.log")              , emit: log
+    tuple val(meta), path("${prefix}/${prefix}.aligned.fa")       , emit: aligned_fa
+    tuple val(meta), path("${prefix}/${prefix}.consensus.fa")     , emit: consensus_fa
+    tuple val(meta), path("${prefix}/${prefix}.consensus.subs.fa"), emit: consensus_subs_fa
+    tuple val(meta), path("${prefix}/${prefix}.raw.vcf")          , emit: raw_vcf
+    tuple val(meta), path("${prefix}/${prefix}.filt.vcf")         , emit: filt_vcf
+    tuple val(meta), path("${prefix}/${prefix}.vcf.gz")           , emit: vcf_gz
+    tuple val(meta), path("${prefix}/${prefix}.vcf.gz.csi")       , emit: vcf_csi
+    tuple val(meta), path("${prefix}/${prefix}.txt")              , emit: txt
+    path "versions.yml"                                           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    prefix = task.ext.prefix ?: "${meta.id}"
+    def read_inputs = meta.single_end ? "--se ${reads[0]}" : "--R1 ${reads[0]} --R2 ${reads[1]}"
+    """
+    snippy \\
+        $args \\
+        --cpus $task.cpus \\
+        --outdir $prefix \\
+        --reference $reference \\
+        --prefix $prefix \\
+        $read_inputs
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        snippy: \$(echo \$(snippy --version 2>&1) | sed 's/snippy //')
+    END_VERSIONS
+    """
+}
--- a/modules/snippy/run/meta.yml
+++ b/modules/snippy/run/meta.yml
@ -0,0 +1,110 @@
+name: snippy_run
+description: Rapid haploid variant calling
+keywords:
+  - variant
+  - fastq
+  - bacteria
+tools:
+  - snippy:
+      description: "Rapid bacterial SNP calling and core genome alignments"
+      homepage: "https://github.com/tseemann/snippy"
+      documentation: "https://github.com/tseemann/snippy"
+      tool_dev_url: "https://github.com/tseemann/snippy"
+      doi: ""
+      licence: "['GPL v2']"
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+      type: file
+      description: |
+        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+        respectively.
+      pattern: "*.{fq,fastq,fq.gz,fastq.gz}"
+  - index:
+      type: file
+      description: Reference genome in GenBank (preferred) or FASTA format
+      pattern: "*.{gbk,gbk.gz,fa,fa.gz}"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - tab:
+      type: file
+      description: A simple tab-separated summary of all the variants
+      pattern: "*.tab"
+  - csv:
+      type: file
+      description: A comma-separated version of the .tab file
+      pattern: "*.csv"
+  - html:
+      type: file
+      description: A HTML version of the .tab file
+      pattern: "*.html"
+  - vcf:
+      type: file
+      description: The final annotated variants in VCF format
+      pattern: "*.vcf"
+  - bed:
+      type: file
+      description: The variants in BED format
+      pattern: "*.bed"
+  - gff:
+      type: file
+      description: The variants in GFF3 format
+      pattern: "*.gff"
+  - bam:
+      type: file
+      description: The alignments in BAM format. Includes unmapped, multimapping reads. Excludes duplicates.
+      pattern: "*.bam"
+  - bai:
+      type: file
+      description: Index for the .bam file
+      pattern: "*.bam.bai"
+  - log:
+      type: file
+      description: A log file with the commands run and their outputs
+      pattern: "*.log"
+  - aligned_fa:
+      type: file
+      description: A version of the reference but with - at position with depth=0 and N for 0 < depth < --mincov (does not have variants)
+      pattern: "*.aligned.fa"
+  - consensus_fa:
+      type: file
+      description: A version of the reference genome with all variants instantiated
+      pattern: "*.consensus.fa"
+  - consensus_subs_fa:
+      type: file
+      description: A version of the reference genome with only substitution variants instantiated
+      pattern: "*.consensus.subs.fa"
+  - raw_vcf:
+      type: file
+      description: The unfiltered variant calls from Freebayes
+      pattern: "*.raw.vcf"
+  - filt_vcf:
+      type: file
+      description: The filtered variant calls from Freebayes
+      pattern: "*.filt.vcf"
+  - vcf_gz:
+      type: file
+      description: Compressed .vcf file via BGZIP
+      pattern: "*.vcf.gz"
+  - vcf_csi:
+      type: file
+      description: Index for the .vcf.gz via bcftools index
+      pattern: "*.vcf.gz.csi"
+  - txt:
+      type: file
+      description: Tab-separated columnar list of statistics
+      pattern: "*.txt"
+authors:
+  - "@rpetit3"
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@ -715,6 +715,10 @@ gamma/gamma:
  - modules/gamma/gamma/**
  - tests/modules/gamma/gamma/**

+gatk/unifiedgenotyper:
+  - modules/gatk/unifiedgenotyper/**
+  - tests/modules/gatk/unifiedgenotyper/**
+
 gatk4/applybqsr:
  - modules/gatk4/applybqsr/**
  - tests/modules/gatk4/applybqsr/**
@ -1859,6 +1863,10 @@ snapaligner/index:
  - modules/snapaligner/index/**
  - tests/modules/snapaligner/index/**

+snippy/run:
+  - modules/snippy/run/**
+  - tests/modules/snippy/run/**
+
 snpdists:
  - modules/snpdists/**
  - tests/modules/snpdists/**
--- a/tests/modules/gatk/unifiedgenotyper/main.nf
+++ b/tests/modules/gatk/unifiedgenotyper/main.nf
@ -0,0 +1,18 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { GATK_UNIFIEDGENOTYPER } from '../../../../modules/gatk/unifiedgenotyper/main.nf'
+
+workflow test_gatk_unifiedgenotyper {
+
+    input     = [ [ id:'test' ], // meta map
+                  file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
+                  file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
+                ]
+    fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+    fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
+    dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
+
+    GATK_UNIFIEDGENOTYPER ( input, fasta, fai, dict, [], [], [], [])
+}
--- a/tests/modules/gatk/unifiedgenotyper/nextflow.config
+++ b/tests/modules/gatk/unifiedgenotyper/nextflow.config
@ -0,0 +1,5 @@
+process {
+
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+    
+}
--- a/tests/modules/gatk/unifiedgenotyper/test.yml
+++ b/tests/modules/gatk/unifiedgenotyper/test.yml
@ -0,0 +1,9 @@
+- name: gatk unifiedgenotyper test_gatk_unifiedgenotyper
+  command: nextflow run ./tests/modules/gatk/unifiedgenotyper -entry test_gatk_unifiedgenotyper -c ./tests/config/nextflow.config  -c ./tests/modules/gatk/unifiedgenotyper/nextflow.config
+  tags:
+    - gatk
+    - gatk/unifiedgenotyper
+  files:
+    - path: output/gatk/test.vcf.gz
+      contains:
+        - "#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	test"
--- a/tests/modules/gatk4/calibratedragstrmodel/test.yml
+++ b/tests/modules/gatk4/calibratedragstrmodel/test.yml
@ -1,26 +1,26 @@
 - name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_bam
  command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_bam -c ./tests/config/nextflow.config  -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config
  tags:
-    - gatk4/calibratedragstrmodel
    - gatk4
+    - gatk4/calibratedragstrmodel
  files:
    - path: output/gatk4/test.txt
-      md5sum: 0a1a1583b157fa2251dd931ed165da4f
+      md5sum: e16fa32906c74bb18b93e98a86718ff1

 - name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_cram
  command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_cram -c ./tests/config/nextflow.config  -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config
  tags:
-    - gatk4/calibratedragstrmodel
    - gatk4
+    - gatk4/calibratedragstrmodel
  files:
    - path: output/gatk4/test.txt
-      md5sum: 1aa7ab38023f724877b3323c5e6b9a4e
+      md5sum: 81c7bf338886cb4d5c2cc07fc56afe44

 - name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_beds
  command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_beds -c ./tests/config/nextflow.config  -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config
  tags:
-    - gatk4/calibratedragstrmodel
    - gatk4
+    - gatk4/calibratedragstrmodel
  files:
    - path: output/gatk4/test.txt
-      md5sum: def8baccad7bd59006f08fcb0a6721bf
+      md5sum: cb6a9acdee042302b54fd1f59b5f54ee
--- a/tests/modules/snippy/run/main.nf
+++ b/tests/modules/snippy/run/main.nf
@ -0,0 +1,16 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { SNIPPY_RUN } from '../../../../modules/snippy/run/main.nf'
+
+workflow test_snippy_run {
+    
+    input = [ [ id:'test', single_end:false ], // meta map
+              [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+                file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
+            ]
+    reference = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+
+    SNIPPY_RUN ( input, reference )
+}
--- a/tests/modules/snippy/run/nextflow.config
+++ b/tests/modules/snippy/run/nextflow.config
@ -0,0 +1,5 @@
+process {
+
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+    
+}
--- a/tests/modules/snippy/run/test.yml
+++ b/tests/modules/snippy/run/test.yml
@ -0,0 +1,39 @@
+- name: snippy run test_snippy_run
+  command: |
+    nextflow run tests/modules/snippy/run -entry test_snippy_run -c tests/config/nextflow.config -c tests/modules/snippy/run/nextflow.config
+  tags:
+    - snippy/run
+    - snippy
+  files:
+    - path: output/snippy/test/test.aligned.fa
+      md5sum: 47e3390d4167edf1955d162d37aca5e3
+    - path: output/snippy/test/test.bam
+    - path: output/snippy/test/test.bam.bai
+    - path: output/snippy/test/test.bed
+    - path: output/snippy/test/test.consensus.fa
+      md5sum: 483f4a5dfe60171c86ee9b7e6dff908b
+    - path: output/snippy/test/test.consensus.subs.fa
+      md5sum: 483f4a5dfe60171c86ee9b7e6dff908b
+    - path: output/snippy/test/test.csv
+      md5sum: 322f942115e5945c2041a88246166703
+    - path: output/snippy/test/test.filt.vcf
+      contains: ["fileformat", "freebayes", "CHROM"]
+    - path: output/snippy/test/test.gff
+      md5sum: df19e1b84ba6f691d20c72b397c88abf
+    - path: output/snippy/test/test.html
+      md5sum: 1ccbf0ffcadae1a6b2e11681d24c9938
+    - path: output/snippy/test/test.log
+      contains: ["snippy", "consensus", "subs"]
+    - path: output/snippy/test/test.raw.vcf
+      contains: ["fileformat", "freebayes", "CHROM"]
+    - path: output/snippy/test/test.tab
+      md5sum: beb9bde3bce985e53e8feba9ec5b136e
+    - path: output/snippy/test/test.txt
+      contains: ["DateTime", "ReadFiles", "VariantTotal"]
+    - path: output/snippy/test/test.vcf
+      contains: ["fileformat", "freebayes", "CHROM"]
+    - path: output/snippy/test/test.vcf.gz
+    - path: output/snippy/test/test.vcf.gz.csi
+      md5sum: bed9fa291c220a1ba04eb2d448932ffc
+    - path: output/snippy/versions.yml
+      md5sum: 518aad56c4dbefb6cbcde5ab38cf7b5d