Merge branch 'master' into mafft-module

2024-12-22 02:58:17 +00:00 · 2022-02-24 14:17:40 +00:00 · 2022-02-24 14:17:40 +00:00 · df31bfb137
commit df31bfb137
parent 2804e2e2d7 4c59984d7b
37 changed files with 1027 additions and 14 deletions
--- a/modules/faqcs/main.nf
+++ b/modules/faqcs/main.nf
@ -0,0 +1,103 @@
+process FAQCS {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda (params.enable_conda ? "bioconda::faqcs=2.10" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/faqcs%3A2.10--r41h9a82719_2' :
+        'quay.io/biocontainers/faqcs:2.10--r41h9a82719_2' }"
+
+    input:
+    tuple val(meta), path(reads)
+
+    output:
+    tuple val(meta), path('*.trimmed.fastq.gz')           , emit: reads
+    tuple val(meta), path('*.stats.txt')                  , emit: stats
+    tuple val(meta), path('*_qc_report.pdf')              , optional:true, emit: statspdf
+    tuple val(meta), path('*.log')                        , emit: log
+    tuple val(meta), path('*.discard.fastq.gz')           , optional:true, emit: reads_fail
+    tuple val(meta), path('*.trimmed.unpaired.fastq.gz')  , optional:true, emit: reads_unpaired
+    path "versions.yml"                                   , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    // Added soft-links to original fastqs for consistent naming in MultiQC
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    if (meta.single_end) {
+        """
+        [ ! -f  ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz
+        FaQCs \\
+            -d . \\
+            -u ${prefix}.fastq.gz \\
+            --prefix ${prefix} \\
+            -t $task.cpus \\
+            $args \\
+            2> ${prefix}.fastp.log
+
+
+        if [[ -f ${prefix}.unpaired.trimmed.fastq ]]; then
+            mv ${prefix}.unpaired.trimmed.fastq ${prefix}.trimmed.fastq
+            gzip ${prefix}.trimmed.fastq
+        fi
+        if [[ -f ${prefix}.discard.trimmed.fastq ]]; then
+            mv ${prefix}.discard.trimmed.fastq ${prefix}.trimmed.discard.fastq
+            gzip ${prefix}.trimmed.discard.fastq
+        fi
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            faqcs: \$(echo \$(FaQCs --version 2>&1) | sed 's/^.*Version: //;' )
+        END_VERSIONS
+        """
+    } else {
+        """
+        [ ! -f  ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz
+        [ ! -f  ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz
+        FaQCs \\
+            -d . \\
+            -1 ${prefix}_1.fastq.gz \\
+            -2 ${prefix}_2.fastq.gz \\
+            --prefix ${meta.id} \\
+            -t $task.cpus \\
+            $args \\
+            2> ${prefix}.fastp.log
+
+        # Unpaired
+        if [[ -f ${prefix}.unpaired.trimmed.fastq ]]; then
+            # If it is empty remove it
+            if [[ ! -s ${prefix}.unpaired.trimmed.fastq ]]; then
+                rm ${prefix}.unpaired.trimmed.fastq
+            else
+                mv ${prefix}.unpaired.trimmed.fastq ${prefix}.trimmed.unpaired.fastq
+                gzip ${prefix}.trimmed.unpaired.fastq
+            fi
+        fi
+
+        # R1
+        if [[ -f ${prefix}.1.trimmed.fastq ]]; then
+            mv ${prefix}.1.trimmed.fastq ${prefix}_1.trimmed.fastq
+            gzip ${prefix}_1.trimmed.fastq
+        fi
+
+        # R2
+        if [[ -f ${prefix}.2.trimmed.fastq ]]; then
+            mv ${prefix}.2.trimmed.fastq ${prefix}_2.trimmed.fastq
+            gzip ${prefix}_2.trimmed.fastq
+        fi
+
+        # Discarded: Created if --discard argument is passed
+        if [[ -f ${prefix}.discard.trimmed.fastq ]]; then
+            mv ${prefix}.discard.trimmed.fastq ${prefix}.trimmed.discard.fastq
+            gzip ${prefix}.trimmed.discard.fastq
+        fi
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            faqcs: \$(echo \$(FaQCs --version 2>&1) | sed 's/^.*Version: //;' )
+        END_VERSIONS
+        """
+    }
+}
+
--- a/modules/faqcs/meta.yml
+++ b/modules/faqcs/meta.yml
@ -0,0 +1,68 @@
+name: faqcs
+description: Perform adapter and quality trimming on sequencing reads with reporting
+keywords:
+  - trimming
+  - quality control
+  - fastq
+  - faqcs
+tools:
+  - faqcs:
+      description: |
+        FaQCs combines several features of currently available applications into a single, user-friendly process, and includes additional unique capabilities such as filtering the PhiX control sequences, conversion of FASTQ formats, and multi-threading. The original data and trimmed summaries are reported within a variety of graphics and reports, providing a simple way to do data quality control and assurance.
+      homepage: https://github.com/LANL-Bioinformatics/FaQCs
+      documentation: https://github.com/LANL-Bioinformatics/FaQCs
+      tool_dev_url: https://github.com/LANL-Bioinformatics/FaQCs
+      doi: "https://doi.org/10.1186/s12859-014-0366-2"
+      licence: ['GPLv3 License']
+
+## TODO nf-core: Add a description of all of the variables used as input
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+      type: file
+      description: |
+        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+        respectively.
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - reads:
+      type: file
+      description: The trimmed/modified fastq reads
+      pattern: "*trimmed.fastq.gz"
+  - reads_fail:
+      type: file
+      description: Reads that failed the preprocessing (Optional with --discard args setting)
+      pattern: "*discard.fastq.gz"
+  - reads_unpaired:
+      type: file
+      description: Reads without matching mates in paired-end files (Optional)
+      pattern: "*trimmed.unpaired.fastq.gz"
+  - stats:
+      type: file
+      description: trimming/qc text stats file
+      pattern: "*.stats.txt"
+  - statspdf:
+      type: file
+      description: trimming/qc pdf report file
+      pattern: "*_qc_report.pdf"
+  - log:
+      type: file
+      description: fastq log file
+      pattern: "*.log"
+authors:
+  - "@mjcipriano"
+  - "@sateeshperi"
+  - "@hseabolt"
--- a/modules/gatk4/combinegvcfs/main.nf
+++ b/modules/gatk4/combinegvcfs/main.nf
@ -0,0 +1,47 @@
+process GATK4_COMBINEGVCFS {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda (params.enable_conda ? "bioconda::gatk4=4.2.5.0" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/gatk4:4.2.5.0--hdfd78af_0' :
+        'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }"
+
+    input:
+    tuple val(meta), path(vcf), path(vcf_idx)
+    path (fasta)
+    path (fasta_fai)
+    path (fasta_dict)
+
+    output:
+    tuple val(meta), path("*.combined.g.vcf.gz"), emit: combined_gvcf
+    path "versions.yml"                         , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def avail_mem       = 3
+    if (!task.memory) {
+        log.info '[GATK COMBINEGVCFS] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
+    } else {
+        avail_mem = task.memory.giga
+    }
+    def input_files = vcf.collect{"-V ${it}"}.join(' ') // add '-V' to each vcf file
+    """
+    gatk \\
+        --java-options "-Xmx${avail_mem}g" \\
+        CombineGVCFs \\
+        -R ${fasta} \\
+        -O ${prefix}.combined.g.vcf.gz \\
+        ${args} \\
+        ${input_files}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
+    END_VERSIONS
+    """
+}
--- a/modules/gatk4/combinegvcfs/meta.yml
+++ b/modules/gatk4/combinegvcfs/meta.yml
@ -0,0 +1,54 @@
+name: gatk4_combinegvcfs
+description: Combine per-sample gVCF files produced by HaplotypeCaller into a multi-sample gVCF file
+keywords:
+  - gvcf
+  - gatk4
+  - vcf
+  - combinegvcfs
+  - Short_Variant_Discovery
+tools:
+  - gatk4:
+      description: Genome Analysis Toolkit (GATK4). Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools
+        with a primary focus on variant discovery and genotyping. Its powerful processing engine
+        and high-performance computing features make it capable of taking on projects of any size.
+      homepage: https://gatk.broadinstitute.org/hc/en-us
+      documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037593911-CombineGVCFs
+      tool_dev_url: https://github.com/broadinstitute/gatk
+      doi: 10.1158/1538-7445.AM2017-3590
+      licence: ['Apache-2.0']
+
+input:
+  - fasta:
+      type: file
+      description: The reference fasta file
+      pattern: "*.fasta"
+  - fai:
+      type: file
+      description: FASTA index file
+      pattern: "*.{fai}"
+  - dict:
+      type: file
+      description: FASTA dictionary file
+      pattern: "*.{dict}"
+  - vcf:
+      type: file
+      description: Compressed VCF files
+      pattern: "*.vcf.gz"
+  - vcf_idx:
+      type: file
+      description: VCF Index file
+      pattern: "*.{fai}"
+output:
+  - gvcf:
+      type: file
+      description: Compressed Combined GVCF file
+      pattern: "*.combined.g.vcf.gz"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+
+authors:
+  - "@sateeshperi"
+  - "@mjcipriano"
+  - "@hseabolt"
--- a/modules/gatk4/selectvariants/main.nf
+++ b/modules/gatk4/selectvariants/main.nf
@ -0,0 +1,41 @@
+process GATK4_SELECTVARIANTS {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda (params.enable_conda ? "bioconda::gatk4=4.2.5.0" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/gatk4:4.2.5.0--hdfd78af_0':
+        'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }"
+
+    input:
+    tuple val(meta), path(vcf), path(vcf_idx)
+
+    output:
+    tuple val(meta), path("*.selectvariants.vcf.gz")       , emit: vcf
+    tuple val(meta), path("*.selectvariants.vcf.gz.tbi")   , emit: tbi
+    path "versions.yml"		                               , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def avail_mem = 3
+    if (!task.memory) {
+        log.info '[GATK VariantFiltration] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
+    } else {
+        avail_mem = task.memory.toGiga()
+    }
+    """
+    gatk --java-options "-Xmx${avail_mem}G" SelectVariants \\
+        -V $vcf \\
+        -O ${prefix}.selectvariants.vcf.gz \\
+        $args
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
+    END_VERSIONS
+    """
+}
--- a/modules/gatk4/selectvariants/meta.yml
+++ b/modules/gatk4/selectvariants/meta.yml
@ -0,0 +1,55 @@
+name: gatk4_selectvariants
+description: Select a subset of variants from a VCF file
+keywords:
+  - gatk
+  - gatk4
+  - selectvariants
+  - vcf
+tools:
+  - gatk4:
+      description: |
+        Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools
+        with a primary focus on variant discovery and genotyping. Its powerful processing engine
+        and high-performance computing features make it capable of taking on projects of any size.
+      homepage: https://gatk.broadinstitute.org/hc/en-us
+      documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360036362532-SelectVariants
+      tool_dev_url: https://github.com/broadinstitute/gatk
+      doi: 10.1158/1538-7445.AM2017-3590
+      licence: ["Apache-2.0"]
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test']
+  - vcf:
+      type: list
+      description: VCF(.gz) file
+      pattern: "*.{vcf,vcf.gz}"
+  - vcf_idx:
+      type: list
+      description: VCF file index
+      pattern: "*.{idx,tbi}"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - vcf:
+      type: file
+      description: Compressed VCF file
+      pattern: "*.selectvariants.vcf.gz"
+  - vcf_tbi:
+      type: list
+      description: VCF file index
+      pattern: "*.{idx,tbi}"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+
+authors:
+  - "@mjcipriano"
--- a/modules/hmmer/hmmalign/main.nf
+++ b/modules/hmmer/hmmalign/main.nf
@ -21,13 +21,11 @@ process HMMER_HMMALIGN {
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
-    def fastacmd = fasta.getExtension() == 'gz' ? "gunzip -c $fasta" : "cat $fasta"
    """
-    $fastacmd | \\
-        hmmalign \\
+    hmmalign \\
        $args \\
        $hmm \\
-        - | gzip -c > ${meta.id}.sthlm.gz
+        $fasta | gzip -c > ${meta.id}.sthlm.gz

    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
--- a/modules/hmmer/hmmalign/meta.yml
+++ b/modules/hmmer/hmmalign/meta.yml
@ -19,12 +19,12 @@ input:
        e.g. [ id:'test' ]
  - fasta:
      type: file
-      description: Amino acid or nucleotide fasta file, gzipped or not
-      pattern: "*.{fna,fna.gz,faa,faa.gz,fasta,fasta.gz,fa,fa.gz}"
+      description: Amino acid or nucleotide gzipped compressed fasta file
+      pattern: "*.{fna.gz,faa.gz,fasta.gz,fa.gz}"
  - hmm:
      type: file
-      description: HMM file
-      pattern: "*.hmm"
+      description: A gzipped HMM file
+      pattern: "*.hmm.gz"

 output:
  - meta:
@ -43,3 +43,4 @@ output:

 authors:
  - "@erikrikarddaniel"
+  - "@jfy133"
--- a/modules/hmmer/hmmsearch/main.nf
+++ b/modules/hmmer/hmmsearch/main.nf
@ -0,0 +1,51 @@
+process HMMER_HMMSEARCH {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda (params.enable_conda ? "bioconda::hmmer=3.3.2" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/hmmer:3.3.2--h1b792b2_1' :
+        'quay.io/biocontainers/hmmer:3.3.2--h1b792b2_1' }"
+
+    input:
+    tuple val(meta), path(hmmfile), path(seqdb), val(write_align), val(write_target), val(write_domain)
+
+    output:
+    tuple val(meta), path('*.txt.gz')    , emit: output
+    tuple val(meta), path('*.sto.gz')   , emit: alignments    , optional: true
+    tuple val(meta), path('*.tbl.gz')   , emit: target_summary, optional: true
+    tuple val(meta), path('*.domtbl.gz'), emit: domain_summary, optional: true
+    path "versions.yml"              , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    output = "${prefix}.txt"
+    alignment = write_align ? "-A ${prefix}.sto" : ''
+    target_summary = write_target ? "--tblout ${prefix}.tbl" : ''
+    domain_summary = write_domain ? "--domtblout ${prefix}.domtbl" :  ''
+    """
+    hmmsearch \\
+        $args \\
+        --cpu $task.cpus \\
+        -o $output \\
+        $alignment \\
+        $target_summary \\
+        $domain_summary \\
+        $hmmfile \\
+        $seqdb
+
+    gzip --no-name *.txt \\
+        ${write_align ? '*.sto' : ''} \\
+        ${write_target ? '*.tbl' : ''} \\
+        ${write_domain ? '*.domtbl' : ''}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        hmmer: \$(hmmsearch -h | grep -o '^# HMMER [0-9.]*' | sed 's/^# HMMER *//')
+    END_VERSIONS
+    """
+}
--- a/modules/hmmer/hmmsearch/meta.yml
+++ b/modules/hmmer/hmmsearch/meta.yml
@ -0,0 +1,69 @@
+name: hmmer_hmmsearch
+description: search profile(s) against a sequence database
+keywords:
+  - hidden Markov model
+  - HMM
+  - hmmer
+  - hmmsearch
+tools:
+  - hmmer:
+      description: Biosequence analysis using profile hidden Markov models
+      homepage: http://hmmer.org/
+      documentation: http://hmmer.org/documentation.html
+      tool_dev_url: https://github.com/EddyRivasLab/hmmer
+      doi: "10.1371/journal.pcbi.1002195"
+      licence: ['BSD']
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - hmmfile:
+      type: file
+      description: One or more HMM profiles created with hmmbuild
+      pattern: "*.{hmm,hmm.gz}"
+  - seqdb:
+      type: file
+      description: Database of sequences in FASTA format
+      pattern: "*.{fasta,fna,faa,fa,fasta.gz,fna.gz,faa.gz,fa.gz}"
+  - write_align:
+      type: val
+      description: Flag to write optional alignment output. Specify with 'true' to output
+  - write_target:
+      type: val
+      description: Flag to write optional per target summary . Specify with 'true' to output
+  - write_domain:
+      type: val
+      description: Flag to write optional per domain summary. Specify with 'true' to output
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - output:
+      type: file
+      description: Human readable output summarizing hmmsearch results
+      pattern: "*.{txt.gz}"
+  - alignments:
+      type: file
+      description: Optional multiple sequence alignment (MSA) in Stockholm format
+      pattern: "*.{sto.gz}"
+  - target_summary:
+      type: file
+      description: Optional tabular (space-delimited) summary of per-target output
+      pattern: "*.{tbl.gz}"
+  - domain_summary:
+      type: file
+      description: Optional tabular (space-delimited) summary of per-domain output
+      pattern: "*.{domtbl.gz}"
+
+authors:
+  - "@Midnighter"
--- a/modules/seqkit/pair/main.nf
+++ b/modules/seqkit/pair/main.nf
@ -0,0 +1,40 @@
+process SEQKIT_PAIR {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda (params.enable_conda ? "bioconda::seqkit=2.1.0" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/seqkit:2.1.0--h9ee0642_0':
+        'quay.io/biocontainers/seqkit:2.1.0--h9ee0642_0' }"
+
+    input:
+    tuple val(meta), path(reads)
+
+    output:
+    tuple val(meta), path("*.paired.fastq.gz")                  , emit: reads
+    tuple val(meta), path("*.unpaired.fastq.gz"), optional: true, emit: unpaired_reads
+    path "versions.yml"                                         , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    seqkit \\
+        pair \\
+        -1 ${reads[0]} \\
+        -2 ${reads[1]} \\
+        $args \\
+        --threads $task.cpus
+
+    # gzip fastq
+    find . -maxdepth 1 -name "*.fastq" -exec gzip {} \;
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        seqkit: \$( seqkit | sed '3!d; s/Version: //' )
+    END_VERSIONS
+    """
+}
--- a/modules/seqkit/pair/meta.yml
+++ b/modules/seqkit/pair/meta.yml
@ -0,0 +1,48 @@
+name: seqkit_pair
+description: match up paired-end reads from two fastq files
+keywords:
+  - seqkit
+  - pair
+tools:
+  - seqkit:
+      description: Cross-platform and ultrafast toolkit for FASTA/Q file manipulation, written by Wei Shen.
+      homepage: https://bioinf.shenwei.me/seqkit/usage/
+      documentation: https://bioinf.shenwei.me/seqkit/usage/
+      tool_dev_url: https://github.com/shenwei356/seqkit/
+      doi: "10.1371/journal.pone.0163962"
+      licence: ['MIT']
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+      type: file
+      description: |
+        List of input paired-end FastQ files.
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - reads:
+      type: file
+      description: Paired fastq reads
+      pattern: "*.paired.fastq.gz"
+  - unpaired_reads:
+      type: file
+      description: Unpaired reads (optional)
+      pattern: "*.unpaired.fastq.gz"
+
+authors:
+  - "@sateeshperi"
+  - "@mjcipriano"
+  - "@hseabolt"
--- a/modules/seqtk/seq/main.nf
+++ b/modules/seqtk/seq/main.nf
@ -0,0 +1,40 @@
+process SEQTK_SEQ {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda (params.enable_conda ? "bioconda::seqtk=1.3" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/seqtk:1.3--h5bf99c6_3' :
+        'quay.io/biocontainers/seqtk:1.3--h5bf99c6_3' }"
+
+    input:
+    tuple val(meta), path(fastx)
+
+    output:
+    tuple val(meta), path("*.gz")     , emit: fastx
+    path "versions.yml"               , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+
+    def extension = "fastq"
+    if ("$fastx" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz/ || "$args" ==~ /\-[aA]/ ) {
+        extension = "fasta"
+    }
+    """
+    seqtk \\
+        seq \\
+        $args \\
+        $fastx | \\
+        gzip -c > ${prefix}.seqtk-seq.${extension}.gz
+
+    cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//')
+    END_VERSIONS
+    """
+}
--- a/modules/seqtk/seq/meta.yml
+++ b/modules/seqtk/seq/meta.yml
@ -0,0 +1,42 @@
+name: seqtk_seq
+description: Common transformation operations on FASTA or FASTQ files.
+keywords:
+  - seq
+tools:
+  - seqtk:
+      description: Seqtk is a fast and lightweight tool for processing sequences in the FASTA or FASTQ format. The seqtk seq command enables common transformation operations on FASTA or FASTQ files.
+      homepage: https://github.com/lh3/seqtk
+      documentation: https://docs.csc.fi/apps/seqtk/
+      tool_dev_url: https://github.com/lh3/seqtk
+      licence: ['MIT']
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test' ]
+  - sequences:
+      type: file
+      description: A FASTQ or FASTA file
+      pattern: "*.{fastq.gz, fastq, fq, fq.gz, fasta, fastq.gz, fa, fa.gz, fas, fas.gz, fna, fna.gz}"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test' ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - sequences:
+      type: file
+      description: FASTQ/FASTA file containing renamed sequences
+      pattern: "*.{fastq.gz, fasta.gz}"
+
+authors:
+  - "@hseabolt"
+  - "@mjcipriano"
+  - "@sateeshperi"
--- a/modules/svdb/merge/main.nf
+++ b/modules/svdb/merge/main.nf
@ -21,15 +21,19 @@ process SVDB_MERGE {
    script:
    def args   = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
-    def input  = ""
-    for (int index = 0; index < vcfs.size(); index++) {
-        input += " ${vcfs[index]}:${priority[index]}"
+    def input  = "${vcfs.join(" ")}"
+    def prio   = ""
+    if(priority) {
+        prio = "--priority ${priority.join(',')}"
+        for (int index = 0; index < vcfs.size(); index++) {
+            input += " ${vcfs[index]}:${priority[index]}"
+        }
    }
    """
    svdb \\
        --merge \\
        $args \\
-        --priority ${priority.join(',')} \\
+        $prio \\
        --vcf $input \\
        > ${prefix}_sv_merge.vcf

--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@ -504,6 +504,10 @@ expansionhunter:
  - modules/expansionhunter/**
  - tests/modules/expansionhunter/**

+faqcs:
+  - modules/faqcs/**
+  - tests/modules/faqcs/**
+
 fargene:
  - modules/fargene/**
  - tests/modules/fargene/**
@ -576,6 +580,10 @@ gatk4/calculatecontamination:
  - modules/gatk4/calculatecontamination/**
  - tests/modules/gatk4/calculatecontamination/**

+gatk4/combinegvcfs:
+  - modules/gatk4/combinegvcfs/**
+  - tests/modules/gatk4/combinegvcfs/**
+
 gatk4/createsequencedictionary:
  - modules/gatk4/createsequencedictionary/**
  - tests/modules/gatk4/createsequencedictionary/**
@ -664,6 +672,10 @@ gatk4/samtofastq:
  - modules/gatk4/samtofastq/**
  - tests/modules/gatk4/samtofastq/**

+gatk4/selectvariants:
+  - modules/gatk4/selectvariants/**
+  - tests/modules/gatk4/selectvariants/**
+
 gatk4/splitncigarreads:
  - modules/gatk4/splitncigarreads/**
  - tests/modules/gatk4/splitncigarreads/**
@ -775,6 +787,10 @@ hmmer/hmmalign:
  - modules/hmmer/hmmalign/**
  - tests/modules/hmmer/hmmalign/**

+hmmer/hmmsearch:
+  - modules/hmmer/hmmsearch/**
+  - tests/modules/hmmer/hmmsearch/**
+
 homer/annotatepeaks:
  - modules/homer/annotatepeaks/**
  - tests/modules/homer/annotatepeaks/**
@ -1433,6 +1449,10 @@ seacr/callpeak:
  - modules/seacr/callpeak/**
  - tests/modules/seacr/callpeak/**

+seqkit/pair:
+  - modules/seqkit/pair/**
+  - tests/modules/seqkit/pair/**
+
 seqkit/split2:
  - modules/seqkit/split2/**
  - tests/modules/seqkit/split2/**
@ -1453,6 +1473,10 @@ seqtk/sample:
  - modules/seqtk/sample/**
  - tests/modules/seqtk/sample/**

+seqtk/seq:
+  - modules/seqtk/seq/**
+  - tests/modules/seqtk/seq/**
+
 seqtk/subseq:
  - modules/seqtk/subseq/**
  - tests/modules/seqtk/subseq/**
--- a/tests/modules/faqcs/main.nf
+++ b/tests/modules/faqcs/main.nf
@ -0,0 +1,30 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { FAQCS } from '../../../modules/faqcs/main.nf'
+
+
+//
+// Test with single-end data
+//
+workflow test_fastp_single_end {
+    input = [ [ id:'test', single_end:true ], // meta map
+              [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
+            ]
+
+    FAQCS ( input )
+}
+
+//
+// Test with paired-end data
+//
+workflow test_fastp_paired_end {
+    input = [ [ id:'test', single_end:false ], // meta map
+              [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+                file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
+            ]
+
+    FAQCS ( input )
+}
+
--- a/tests/modules/faqcs/nextflow.config
+++ b/tests/modules/faqcs/nextflow.config
@ -0,0 +1,5 @@
+process {
+
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+
+}
--- a/tests/modules/faqcs/test.yml
+++ b/tests/modules/faqcs/test.yml
@ -0,0 +1,31 @@
+- name: faqcs test_fastp_single_end
+  command: nextflow run tests/modules/faqcs -entry test_fastp_single_end -c tests/config/nextflow.config
+  tags:
+    - faqcs
+  files:
+    - path: output/faqcs/test.fastp.log
+      md5sum: be79dc893f87de1f82faf749cdfb848c
+    - path: output/faqcs/test.stats.txt
+      md5sum: ea20e93706b2e4c676004253baa3cec6
+    - path: output/faqcs/test.trimmed.fastq.gz
+      md5sum: 875863b402f67403dac63ef59b9c9a8a
+    - path: output/faqcs/test_qc_report.pdf
+    - path: output/faqcs/versions.yml
+      md5sum: 2a38d7e7ab5299336e9669c393c9da6c
+
+- name: faqcs test_fastp_paired_end
+  command: nextflow run tests/modules/faqcs -entry test_fastp_paired_end -c tests/config/nextflow.config
+  tags:
+    - faqcs
+  files:
+    - path: output/faqcs/test.fastp.log
+      md5sum: be79dc893f87de1f82faf749cdfb848c
+    - path: output/faqcs/test.stats.txt
+      md5sum: 9a693f8af94ab8c485519d9a523aa622
+    - path: output/faqcs/test_1.trimmed.fastq.gz
+      md5sum: 875863b402f67403dac63ef59b9c9a8a
+    - path: output/faqcs/test_2.trimmed.fastq.gz
+      md5sum: 375aeb74819ca3d72203135ac80df78c
+    - path: output/faqcs/test_qc_report.pdf
+    - path: output/faqcs/versions.yml
+      md5sum: 208d54c0cf6dfc54e719b81b990afac9
--- a/tests/modules/gatk4/combinegvcfs/main.nf
+++ b/tests/modules/gatk4/combinegvcfs/main.nf
@ -0,0 +1,24 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { GATK4_COMBINEGVCFS } from '../../../../modules/gatk4/combinegvcfs/main.nf'
+
+workflow test_gatk4_combinegvcfs {
+    
+    input = [ [ id:'test', single_end:false ], // meta map
+              [ file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test2_genome_vcf'], checkIfExists: true) ],
+              [ file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_idx'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test2_genome_vcf_idx'], checkIfExists: true) ]              
+           ]
+
+    fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+
+    fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
+
+    fasta_dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)       
+
+    GATK4_COMBINEGVCFS ( input, fasta, fasta_fai, fasta_dict )
+}
+
--- a/tests/modules/gatk4/combinegvcfs/nextflow.config
+++ b/tests/modules/gatk4/combinegvcfs/nextflow.config
@ -0,0 +1,6 @@
+process {
+
+    ext.args = "--tmp-dir ."
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+    
+}
--- a/tests/modules/gatk4/combinegvcfs/test.yml
+++ b/tests/modules/gatk4/combinegvcfs/test.yml
@ -0,0 +1,10 @@
+- name: gatk4 combinegvcfs test_gatk4_combinegvcfs
+  command: nextflow run tests/modules/gatk4/combinegvcfs -entry test_gatk4_combinegvcfs -c tests/config/nextflow.config
+  tags:
+    - gatk4
+    - gatk4/combinegvcfs
+  files:
+    - path: output/gatk4/test.combined.g.vcf.gz
+      contains: ['VCFv4.2']
+    - path: output/gatk4/versions.yml
+      md5sum: 49d9c467f84b6a99a4da3ef161af26bd
--- a/tests/modules/gatk4/selectvariants/main.nf
+++ b/tests/modules/gatk4/selectvariants/main.nf
@ -0,0 +1,29 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { GATK4_SELECTVARIANTS } from '../../../../modules/gatk4/selectvariants/main.nf'
+
+// Basic parameters with uncompressed VCF input
+workflow test_gatk4_selectvariants_vcf_input {
+
+    input = [
+        [ id:'test' ], // meta map
+        file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true),
+        file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_idx'], checkIfExists: true)
+    ]
+
+    GATK4_SELECTVARIANTS ( input)
+}
+
+// Basic parameters with compressed VCF input
+workflow test_gatk4_selectvariants_gz_input {
+
+    input = [
+        [ id:'test' ], // meta map
+        file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true),
+        file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true)
+    ]
+
+    GATK4_SELECTVARIANTS ( input )
+}
--- a/tests/modules/gatk4/selectvariants/nextflow.config
+++ b/tests/modules/gatk4/selectvariants/nextflow.config
@ -0,0 +1,5 @@
+process {
+
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+    
+}
--- a/tests/modules/gatk4/selectvariants/test.yml
+++ b/tests/modules/gatk4/selectvariants/test.yml
@ -0,0 +1,21 @@
+- name: gatk4 selectvariants test_gatk4_selectvariants_vcf_input
+  command: nextflow run tests/modules/gatk4/selectvariants -entry test_gatk4_selectvariants_vcf_input -c tests/config/nextflow.config
+  tags:
+    - gatk4/selectvariants
+    - gatk4
+  files:
+    - path: output/gatk4/test.selectvariants.vcf.gz
+    - path: output/gatk4/test.selectvariants.vcf.gz.tbi
+    - path: output/gatk4/versions.yml
+      md5sum: a35d78af179f43652274bc7405d5a785
+
+- name: gatk4 selectvariants test_gatk4_selectvariants_gz_input
+  command: nextflow run tests/modules/gatk4/selectvariants -entry test_gatk4_selectvariants_gz_input -c tests/config/nextflow.config
+  tags:
+    - gatk4/selectvariants
+    - gatk4
+  files:
+    - path: output/gatk4/test.selectvariants.vcf.gz
+    - path: output/gatk4/test.selectvariants.vcf.gz.tbi
+    - path: output/gatk4/versions.yml
+      md5sum: c943f3579a369968ca63444eb43fb6e7
--- a/tests/modules/hmmer/hmmalign/main.nf
+++ b/tests/modules/hmmer/hmmalign/main.nf
@ -8,10 +8,10 @@ workflow test_hmmer_hmmalign {

    input = [
        [ id:'test' ], // meta map
-        file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/e_coli_k12_16s.fna')      // Change to params.test_data syntax after the data is included in tests/config/test_data.config
+        file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/e_coli_k12_16s.fna.gz')      // Change to params.test_data syntax after the data is included in tests/config/test_data.config
    ]

-    hmm   = file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/bac.16S_rRNA.hmm')
+    hmm   = file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/bac.16S_rRNA.hmm.gz')

    HMMER_HMMALIGN ( input, hmm )
 }
--- a/tests/modules/hmmer/hmmsearch/main.nf
+++ b/tests/modules/hmmer/hmmsearch/main.nf
@ -0,0 +1,33 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { HMMER_HMMSEARCH } from '../../../../modules/hmmer/hmmsearch/main.nf'
+
+workflow test_hmmer_hmmsearch {
+
+    input = [
+        [ id:'test', single_end:false ], // meta map
+        file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/bac.16S_rRNA.hmm.gz', checkIfExists: true),
+        file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/e_coli_k12_16s.fna.gz', checkIfExists: true),
+        false,
+        false,
+        false
+    ]
+
+    HMMER_HMMSEARCH ( input )
+}
+
+workflow test_hmmer_hmmsearch_optional {
+
+    input = [
+        [ id:'test', single_end:false ], // meta map
+        file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/bac.16S_rRNA.hmm.gz', checkIfExists: true),
+        file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/e_coli_k12_16s.fna.gz', checkIfExists: true),
+        true,
+        true,
+        true
+    ]
+
+    HMMER_HMMSEARCH ( input )
+}
--- a/tests/modules/hmmer/hmmsearch/nextflow.config
+++ b/tests/modules/hmmer/hmmsearch/nextflow.config
@ -0,0 +1,5 @@
+process {
+
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+    
+}
--- a/tests/modules/hmmer/hmmsearch/test.yml
+++ b/tests/modules/hmmer/hmmsearch/test.yml
@ -0,0 +1,31 @@
+- name: hmmer hmmsearch test_hmmer_hmmsearch
+  command: nextflow run tests/modules/hmmer/hmmsearch -entry test_hmmer_hmmsearch -c tests/config/nextflow.config
+  tags:
+    - hmmer/hmmsearch
+    - hmmer
+  files:
+    - path: output/hmmer/test.txt.gz
+      contains:
+        - '[ok]'
+    - path: output/hmmer/versions.yml
+      md5sum: ed0808c10abd205c6bd0fb01f45259bb
+
+- name: hmmer hmmsearch test_hmmer_hmmsearch_optional
+  command: nextflow run tests/modules/hmmer/hmmsearch -entry test_hmmer_hmmsearch_optional -c tests/config/nextflow.config
+  tags:
+    - hmmer/hmmsearch
+    - hmmer
+  files:
+    - path: output/hmmer/test.sto.gz
+      md5sum: d3121aa33455074c566fb7f8fdcda7b0
+    - path: output/hmmer/test.domtbl.gz
+      contains:
+        - '# [ok]'
+    - path: output/hmmer/test.tbl.gz
+      contains:
+        - '# [ok]'
+    - path: output/hmmer/test.txt.gz
+      contains:
+        - '[ok]'
+    - path: output/hmmer/versions.yml
+      md5sum: ebdcb08ae540e840f7b5c4c75a3a2993
--- a/tests/modules/seqkit/pair/main.nf
+++ b/tests/modules/seqkit/pair/main.nf
@ -0,0 +1,16 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { SEQKIT_PAIR } from '../../../../modules/seqkit/pair/main.nf'
+
+workflow test_seqkit_pair {
+    
+    input = [ 
+        [ id:'test', single_end:false ], // meta map
+        [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+          file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
+    ]
+
+    SEQKIT_PAIR ( input )
+}
--- a/tests/modules/seqkit/pair/nextflow.config
+++ b/tests/modules/seqkit/pair/nextflow.config
@ -0,0 +1,6 @@
+process {
+
+    ext.args = "-u"
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+    
+}
--- a/tests/modules/seqkit/pair/test.yml
+++ b/tests/modules/seqkit/pair/test.yml
@ -0,0 +1,12 @@
+- name: seqkit pair test_seqkit_pair
+  command: nextflow run tests/modules/seqkit/pair -entry test_seqkit_pair -c tests/config/nextflow.config
+  tags:
+    - seqkit/pair
+    - seqkit
+  files:
+    - path: output/seqkit/test_1.paired.fastq.gz
+      md5sum: fbfe7e8bdbc29abaaf58b6f1a32448e5
+    - path: output/seqkit/test_2.paired.fastq.gz
+      md5sum: 7d3c0912e5adc2674e8ecc1e647381b3
+    - path: output/seqkit/versions.yml
+      md5sum: 3086293bc986fc2ece38b1951d090819
--- a/tests/modules/seqtk/seq/main.nf
+++ b/tests/modules/seqtk/seq/main.nf
@ -0,0 +1,19 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { SEQTK_SEQ } from '../../../../modules/seqtk/seq/main.nf'
+
+workflow test_seqtk_seq {
+    input = [ [ id:'test' ],   // meta map
+              [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
+            ]
+    SEQTK_SEQ ( input )
+}
+
+workflow test_seqtk_seq_fq {
+    input = [ [ id:'test' ], // meta map
+              [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
+            ]
+    SEQTK_SEQ ( input )
+}
--- a/tests/modules/seqtk/seq/nextflow.config
+++ b/tests/modules/seqtk/seq/nextflow.config
@ -0,0 +1,7 @@
+process {
+    // Testing ext.args for passing arguments into seqtk seq
+    withName: 'SEQTK_SEQ' {
+        ext.args = '-A'
+        publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+    }
+}
--- a/tests/modules/seqtk/seq/test.yml
+++ b/tests/modules/seqtk/seq/test.yml
@ -0,0 +1,21 @@
+- name: seqtk seq test_seqtk_seq
+  command: nextflow run tests/modules/seqtk/seq -entry test_seqtk_seq -c tests/config/nextflow.config
+  tags:
+    - seqtk/seq
+    - seqtk
+  files:
+    - path: output/seqtk/test.seqtk-seq.fasta.gz
+      md5sum: 50d73992c8c7e56dc095ef47ec52a754
+    - path: output/seqtk/versions.yml
+      md5sum: 2b89cd4a6e28f35fcfbbd2188384f944
+
+- name: seqtk seq test_seqtk_seq_fq
+  command: nextflow run tests/modules/seqtk/seq -entry test_seqtk_seq_fq -c tests/config/nextflow.config
+  tags:
+    - seqtk/seq
+    - seqtk
+  files:
+    - path: output/seqtk/test.seqtk-seq.fasta.gz
+      md5sum: 2f009f1647971a97b4edec726a99dc1a
+    - path: output/seqtk/versions.yml
+      md5sum: 3467a76d3540bee8f58de050512bddaa
--- a/tests/modules/svdb/merge/main.nf
+++ b/tests/modules/svdb/merge/main.nf
@ -14,3 +14,13 @@ workflow test_svdb_merge {

    SVDB_MERGE ( input, priority )
 }
+
+workflow test_svdb_merge_noprio {
+
+    input = [ [ id:'test' ], // meta map
+              [ file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true),
+                file(params.test_data['sarscov2']['illumina']['test2_vcf'], checkIfExists: true) ]
+            ]
+
+    SVDB_MERGE ( input, [] )
+}
--- a/tests/modules/svdb/merge/test.yml
+++ b/tests/modules/svdb/merge/test.yml
@ -5,3 +5,10 @@
    - svdb/merge
  files:
    - path: output/svdb/test_sv_merge.vcf
+- name: svdb merge noprio
+  command: nextflow run ./tests/modules/svdb/merge -entry test_svdb_merge_noprio -c ./tests/config/nextflow.config -c ./tests/modules/svdb/merge/nextflow.config
+  tags:
+    - svdb
+    - svdb/merge
+  files:
+    - path: output/svdb/test_sv_merge.vcf