diff --git a/modules/faqcs/main.nf b/modules/faqcs/main.nf new file mode 100644 index 00000000..a03a0150 --- /dev/null +++ b/modules/faqcs/main.nf @@ -0,0 +1,103 @@ +process FAQCS { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::faqcs=2.10" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/faqcs%3A2.10--r41h9a82719_2' : + 'quay.io/biocontainers/faqcs:2.10--r41h9a82719_2' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path('*.trimmed.fastq.gz') , emit: reads + tuple val(meta), path('*.stats.txt') , emit: stats + tuple val(meta), path('*_qc_report.pdf') , optional:true, emit: statspdf + tuple val(meta), path('*.log') , emit: log + tuple val(meta), path('*.discard.fastq.gz') , optional:true, emit: reads_fail + tuple val(meta), path('*.trimmed.unpaired.fastq.gz') , optional:true, emit: reads_unpaired + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + // Added soft-links to original fastqs for consistent naming in MultiQC + def prefix = task.ext.prefix ?: "${meta.id}" + if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz + FaQCs \\ + -d . \\ + -u ${prefix}.fastq.gz \\ + --prefix ${prefix} \\ + -t $task.cpus \\ + $args \\ + 2> ${prefix}.fastp.log + + + if [[ -f ${prefix}.unpaired.trimmed.fastq ]]; then + mv ${prefix}.unpaired.trimmed.fastq ${prefix}.trimmed.fastq + gzip ${prefix}.trimmed.fastq + fi + if [[ -f ${prefix}.discard.trimmed.fastq ]]; then + mv ${prefix}.discard.trimmed.fastq ${prefix}.trimmed.discard.fastq + gzip ${prefix}.trimmed.discard.fastq + fi + cat <<-END_VERSIONS > versions.yml + "${task.process}": + faqcs: \$(echo \$(FaQCs --version 2>&1) | sed 's/^.*Version: //;' ) + END_VERSIONS + """ + } else { + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz + FaQCs \\ + -d . \\ + -1 ${prefix}_1.fastq.gz \\ + -2 ${prefix}_2.fastq.gz \\ + --prefix ${meta.id} \\ + -t $task.cpus \\ + $args \\ + 2> ${prefix}.fastp.log + + # Unpaired + if [[ -f ${prefix}.unpaired.trimmed.fastq ]]; then + # If it is empty remove it + if [[ ! -s ${prefix}.unpaired.trimmed.fastq ]]; then + rm ${prefix}.unpaired.trimmed.fastq + else + mv ${prefix}.unpaired.trimmed.fastq ${prefix}.trimmed.unpaired.fastq + gzip ${prefix}.trimmed.unpaired.fastq + fi + fi + + # R1 + if [[ -f ${prefix}.1.trimmed.fastq ]]; then + mv ${prefix}.1.trimmed.fastq ${prefix}_1.trimmed.fastq + gzip ${prefix}_1.trimmed.fastq + fi + + # R2 + if [[ -f ${prefix}.2.trimmed.fastq ]]; then + mv ${prefix}.2.trimmed.fastq ${prefix}_2.trimmed.fastq + gzip ${prefix}_2.trimmed.fastq + fi + + # Discarded: Created if --discard argument is passed + if [[ -f ${prefix}.discard.trimmed.fastq ]]; then + mv ${prefix}.discard.trimmed.fastq ${prefix}.trimmed.discard.fastq + gzip ${prefix}.trimmed.discard.fastq + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + faqcs: \$(echo \$(FaQCs --version 2>&1) | sed 's/^.*Version: //;' ) + END_VERSIONS + """ + } +} + diff --git a/modules/faqcs/meta.yml b/modules/faqcs/meta.yml new file mode 100644 index 00000000..eca35e65 --- /dev/null +++ b/modules/faqcs/meta.yml @@ -0,0 +1,68 @@ +name: faqcs +description: Perform adapter and quality trimming on sequencing reads with reporting +keywords: + - trimming + - quality control + - fastq + - faqcs +tools: + - faqcs: + description: | + FaQCs combines several features of currently available applications into a single, user-friendly process, and includes additional unique capabilities such as filtering the PhiX control sequences, conversion of FASTQ formats, and multi-threading. The original data and trimmed summaries are reported within a variety of graphics and reports, providing a simple way to do data quality control and assurance. + homepage: https://github.com/LANL-Bioinformatics/FaQCs + documentation: https://github.com/LANL-Bioinformatics/FaQCs + tool_dev_url: https://github.com/LANL-Bioinformatics/FaQCs + doi: "https://doi.org/10.1186/s12859-014-0366-2" + licence: ['GPLv3 License'] + +## TODO nf-core: Add a description of all of the variables used as input +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reads: + type: file + description: The trimmed/modified fastq reads + pattern: "*trimmed.fastq.gz" + - reads_fail: + type: file + description: Reads that failed the preprocessing (Optional with --discard args setting) + pattern: "*discard.fastq.gz" + - reads_unpaired: + type: file + description: Reads without matching mates in paired-end files (Optional) + pattern: "*trimmed.unpaired.fastq.gz" + - stats: + type: file + description: trimming/qc text stats file + pattern: "*.stats.txt" + - statspdf: + type: file + description: trimming/qc pdf report file + pattern: "*_qc_report.pdf" + - log: + type: file + description: fastq log file + pattern: "*.log" +authors: + - "@mjcipriano" + - "@sateeshperi" + - "@hseabolt" diff --git a/modules/gatk4/combinegvcfs/main.nf b/modules/gatk4/combinegvcfs/main.nf new file mode 100644 index 00000000..c0a7ac45 --- /dev/null +++ b/modules/gatk4/combinegvcfs/main.nf @@ -0,0 +1,47 @@ +process GATK4_COMBINEGVCFS { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.5.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.5.0--hdfd78af_0' : + 'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }" + + input: + tuple val(meta), path(vcf), path(vcf_idx) + path (fasta) + path (fasta_fai) + path (fasta_dict) + + output: + tuple val(meta), path("*.combined.g.vcf.gz"), emit: combined_gvcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK COMBINEGVCFS] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + def input_files = vcf.collect{"-V ${it}"}.join(' ') // add '-V' to each vcf file + """ + gatk \\ + --java-options "-Xmx${avail_mem}g" \\ + CombineGVCFs \\ + -R ${fasta} \\ + -O ${prefix}.combined.g.vcf.gz \\ + ${args} \\ + ${input_files} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/gatk4/combinegvcfs/meta.yml b/modules/gatk4/combinegvcfs/meta.yml new file mode 100644 index 00000000..2e0198dd --- /dev/null +++ b/modules/gatk4/combinegvcfs/meta.yml @@ -0,0 +1,54 @@ +name: gatk4_combinegvcfs +description: Combine per-sample gVCF files produced by HaplotypeCaller into a multi-sample gVCF file +keywords: + - gvcf + - gatk4 + - vcf + - combinegvcfs + - Short_Variant_Discovery +tools: + - gatk4: + description: Genome Analysis Toolkit (GATK4). Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037593911-CombineGVCFs + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ['Apache-2.0'] + +input: + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" + - dict: + type: file + description: FASTA dictionary file + pattern: "*.{dict}" + - vcf: + type: file + description: Compressed VCF files + pattern: "*.vcf.gz" + - vcf_idx: + type: file + description: VCF Index file + pattern: "*.{fai}" +output: + - gvcf: + type: file + description: Compressed Combined GVCF file + pattern: "*.combined.g.vcf.gz" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@sateeshperi" + - "@mjcipriano" + - "@hseabolt" diff --git a/modules/gatk4/selectvariants/main.nf b/modules/gatk4/selectvariants/main.nf new file mode 100644 index 00000000..fd750a9b --- /dev/null +++ b/modules/gatk4/selectvariants/main.nf @@ -0,0 +1,41 @@ +process GATK4_SELECTVARIANTS { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.5.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.5.0--hdfd78af_0': + 'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }" + + input: + tuple val(meta), path(vcf), path(vcf_idx) + + output: + tuple val(meta), path("*.selectvariants.vcf.gz") , emit: vcf + tuple val(meta), path("*.selectvariants.vcf.gz.tbi") , emit: tbi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK VariantFiltration] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.toGiga() + } + """ + gatk --java-options "-Xmx${avail_mem}G" SelectVariants \\ + -V $vcf \\ + -O ${prefix}.selectvariants.vcf.gz \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/gatk4/selectvariants/meta.yml b/modules/gatk4/selectvariants/meta.yml new file mode 100644 index 00000000..381af249 --- /dev/null +++ b/modules/gatk4/selectvariants/meta.yml @@ -0,0 +1,55 @@ +name: gatk4_selectvariants +description: Select a subset of variants from a VCF file +keywords: + - gatk + - gatk4 + - selectvariants + - vcf +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360036362532-SelectVariants + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - vcf: + type: list + description: VCF(.gz) file + pattern: "*.{vcf,vcf.gz}" + - vcf_idx: + type: list + description: VCF file index + pattern: "*.{idx,tbi}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: Compressed VCF file + pattern: "*.selectvariants.vcf.gz" + - vcf_tbi: + type: list + description: VCF file index + pattern: "*.{idx,tbi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@mjcipriano" diff --git a/modules/hmmer/hmmalign/main.nf b/modules/hmmer/hmmalign/main.nf index d3df3c89..f8156e04 100644 --- a/modules/hmmer/hmmalign/main.nf +++ b/modules/hmmer/hmmalign/main.nf @@ -21,13 +21,11 @@ process HMMER_HMMALIGN { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def fastacmd = fasta.getExtension() == 'gz' ? "gunzip -c $fasta" : "cat $fasta" """ - $fastacmd | \\ - hmmalign \\ + hmmalign \\ $args \\ $hmm \\ - - | gzip -c > ${meta.id}.sthlm.gz + $fasta | gzip -c > ${meta.id}.sthlm.gz cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/hmmer/hmmalign/meta.yml b/modules/hmmer/hmmalign/meta.yml index 296826d4..6c501120 100644 --- a/modules/hmmer/hmmalign/meta.yml +++ b/modules/hmmer/hmmalign/meta.yml @@ -19,12 +19,12 @@ input: e.g. [ id:'test' ] - fasta: type: file - description: Amino acid or nucleotide fasta file, gzipped or not - pattern: "*.{fna,fna.gz,faa,faa.gz,fasta,fasta.gz,fa,fa.gz}" + description: Amino acid or nucleotide gzipped compressed fasta file + pattern: "*.{fna.gz,faa.gz,fasta.gz,fa.gz}" - hmm: type: file - description: HMM file - pattern: "*.hmm" + description: A gzipped HMM file + pattern: "*.hmm.gz" output: - meta: @@ -43,3 +43,4 @@ output: authors: - "@erikrikarddaniel" + - "@jfy133" diff --git a/modules/hmmer/hmmsearch/main.nf b/modules/hmmer/hmmsearch/main.nf new file mode 100644 index 00000000..3a2d15b7 --- /dev/null +++ b/modules/hmmer/hmmsearch/main.nf @@ -0,0 +1,51 @@ +process HMMER_HMMSEARCH { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::hmmer=3.3.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmmer:3.3.2--h1b792b2_1' : + 'quay.io/biocontainers/hmmer:3.3.2--h1b792b2_1' }" + + input: + tuple val(meta), path(hmmfile), path(seqdb), val(write_align), val(write_target), val(write_domain) + + output: + tuple val(meta), path('*.txt.gz') , emit: output + tuple val(meta), path('*.sto.gz') , emit: alignments , optional: true + tuple val(meta), path('*.tbl.gz') , emit: target_summary, optional: true + tuple val(meta), path('*.domtbl.gz'), emit: domain_summary, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + output = "${prefix}.txt" + alignment = write_align ? "-A ${prefix}.sto" : '' + target_summary = write_target ? "--tblout ${prefix}.tbl" : '' + domain_summary = write_domain ? "--domtblout ${prefix}.domtbl" : '' + """ + hmmsearch \\ + $args \\ + --cpu $task.cpus \\ + -o $output \\ + $alignment \\ + $target_summary \\ + $domain_summary \\ + $hmmfile \\ + $seqdb + + gzip --no-name *.txt \\ + ${write_align ? '*.sto' : ''} \\ + ${write_target ? '*.tbl' : ''} \\ + ${write_domain ? '*.domtbl' : ''} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hmmer: \$(hmmsearch -h | grep -o '^# HMMER [0-9.]*' | sed 's/^# HMMER *//') + END_VERSIONS + """ +} diff --git a/modules/hmmer/hmmsearch/meta.yml b/modules/hmmer/hmmsearch/meta.yml new file mode 100644 index 00000000..b315d668 --- /dev/null +++ b/modules/hmmer/hmmsearch/meta.yml @@ -0,0 +1,69 @@ +name: hmmer_hmmsearch +description: search profile(s) against a sequence database +keywords: + - hidden Markov model + - HMM + - hmmer + - hmmsearch +tools: + - hmmer: + description: Biosequence analysis using profile hidden Markov models + homepage: http://hmmer.org/ + documentation: http://hmmer.org/documentation.html + tool_dev_url: https://github.com/EddyRivasLab/hmmer + doi: "10.1371/journal.pcbi.1002195" + licence: ['BSD'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - hmmfile: + type: file + description: One or more HMM profiles created with hmmbuild + pattern: "*.{hmm,hmm.gz}" + - seqdb: + type: file + description: Database of sequences in FASTA format + pattern: "*.{fasta,fna,faa,fa,fasta.gz,fna.gz,faa.gz,fa.gz}" + - write_align: + type: val + description: Flag to write optional alignment output. Specify with 'true' to output + - write_target: + type: val + description: Flag to write optional per target summary . Specify with 'true' to output + - write_domain: + type: val + description: Flag to write optional per domain summary. Specify with 'true' to output + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - output: + type: file + description: Human readable output summarizing hmmsearch results + pattern: "*.{txt.gz}" + - alignments: + type: file + description: Optional multiple sequence alignment (MSA) in Stockholm format + pattern: "*.{sto.gz}" + - target_summary: + type: file + description: Optional tabular (space-delimited) summary of per-target output + pattern: "*.{tbl.gz}" + - domain_summary: + type: file + description: Optional tabular (space-delimited) summary of per-domain output + pattern: "*.{domtbl.gz}" + +authors: + - "@Midnighter" diff --git a/modules/seqkit/pair/main.nf b/modules/seqkit/pair/main.nf new file mode 100644 index 00000000..228b98bd --- /dev/null +++ b/modules/seqkit/pair/main.nf @@ -0,0 +1,40 @@ +process SEQKIT_PAIR { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::seqkit=2.1.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/seqkit:2.1.0--h9ee0642_0': + 'quay.io/biocontainers/seqkit:2.1.0--h9ee0642_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.paired.fastq.gz") , emit: reads + tuple val(meta), path("*.unpaired.fastq.gz"), optional: true, emit: unpaired_reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + seqkit \\ + pair \\ + -1 ${reads[0]} \\ + -2 ${reads[1]} \\ + $args \\ + --threads $task.cpus + + # gzip fastq + find . -maxdepth 1 -name "*.fastq" -exec gzip {} \; + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqkit: \$( seqkit | sed '3!d; s/Version: //' ) + END_VERSIONS + """ +} diff --git a/modules/seqkit/pair/meta.yml b/modules/seqkit/pair/meta.yml new file mode 100644 index 00000000..3b35d908 --- /dev/null +++ b/modules/seqkit/pair/meta.yml @@ -0,0 +1,48 @@ +name: seqkit_pair +description: match up paired-end reads from two fastq files +keywords: + - seqkit + - pair +tools: + - seqkit: + description: Cross-platform and ultrafast toolkit for FASTA/Q file manipulation, written by Wei Shen. + homepage: https://bioinf.shenwei.me/seqkit/usage/ + documentation: https://bioinf.shenwei.me/seqkit/usage/ + tool_dev_url: https://github.com/shenwei356/seqkit/ + doi: "10.1371/journal.pone.0163962" + licence: ['MIT'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input paired-end FastQ files. + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reads: + type: file + description: Paired fastq reads + pattern: "*.paired.fastq.gz" + - unpaired_reads: + type: file + description: Unpaired reads (optional) + pattern: "*.unpaired.fastq.gz" + +authors: + - "@sateeshperi" + - "@mjcipriano" + - "@hseabolt" diff --git a/modules/seqtk/seq/main.nf b/modules/seqtk/seq/main.nf new file mode 100644 index 00000000..1fb03003 --- /dev/null +++ b/modules/seqtk/seq/main.nf @@ -0,0 +1,40 @@ +process SEQTK_SEQ { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::seqtk=1.3" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/seqtk:1.3--h5bf99c6_3' : + 'quay.io/biocontainers/seqtk:1.3--h5bf99c6_3' }" + + input: + tuple val(meta), path(fastx) + + output: + tuple val(meta), path("*.gz") , emit: fastx + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def extension = "fastq" + if ("$fastx" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz/ || "$args" ==~ /\-[aA]/ ) { + extension = "fasta" + } + """ + seqtk \\ + seq \\ + $args \\ + $fastx | \\ + gzip -c > ${prefix}.seqtk-seq.${extension}.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/seqtk/seq/meta.yml b/modules/seqtk/seq/meta.yml new file mode 100644 index 00000000..ac32162b --- /dev/null +++ b/modules/seqtk/seq/meta.yml @@ -0,0 +1,42 @@ +name: seqtk_seq +description: Common transformation operations on FASTA or FASTQ files. +keywords: + - seq +tools: + - seqtk: + description: Seqtk is a fast and lightweight tool for processing sequences in the FASTA or FASTQ format. The seqtk seq command enables common transformation operations on FASTA or FASTQ files. + homepage: https://github.com/lh3/seqtk + documentation: https://docs.csc.fi/apps/seqtk/ + tool_dev_url: https://github.com/lh3/seqtk + licence: ['MIT'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - sequences: + type: file + description: A FASTQ or FASTA file + pattern: "*.{fastq.gz, fastq, fq, fq.gz, fasta, fastq.gz, fa, fa.gz, fas, fas.gz, fna, fna.gz}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - sequences: + type: file + description: FASTQ/FASTA file containing renamed sequences + pattern: "*.{fastq.gz, fasta.gz}" + +authors: + - "@hseabolt" + - "@mjcipriano" + - "@sateeshperi" diff --git a/modules/svdb/merge/main.nf b/modules/svdb/merge/main.nf index 1f479ea4..505e2c0b 100644 --- a/modules/svdb/merge/main.nf +++ b/modules/svdb/merge/main.nf @@ -21,15 +21,19 @@ process SVDB_MERGE { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def input = "" - for (int index = 0; index < vcfs.size(); index++) { - input += " ${vcfs[index]}:${priority[index]}" + def input = "${vcfs.join(" ")}" + def prio = "" + if(priority) { + prio = "--priority ${priority.join(',')}" + for (int index = 0; index < vcfs.size(); index++) { + input += " ${vcfs[index]}:${priority[index]}" + } } """ svdb \\ --merge \\ $args \\ - --priority ${priority.join(',')} \\ + $prio \\ --vcf $input \\ > ${prefix}_sv_merge.vcf diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index a71d282c..d84bff65 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -504,6 +504,10 @@ expansionhunter: - modules/expansionhunter/** - tests/modules/expansionhunter/** +faqcs: + - modules/faqcs/** + - tests/modules/faqcs/** + fargene: - modules/fargene/** - tests/modules/fargene/** @@ -576,6 +580,10 @@ gatk4/calculatecontamination: - modules/gatk4/calculatecontamination/** - tests/modules/gatk4/calculatecontamination/** +gatk4/combinegvcfs: + - modules/gatk4/combinegvcfs/** + - tests/modules/gatk4/combinegvcfs/** + gatk4/createsequencedictionary: - modules/gatk4/createsequencedictionary/** - tests/modules/gatk4/createsequencedictionary/** @@ -664,6 +672,10 @@ gatk4/samtofastq: - modules/gatk4/samtofastq/** - tests/modules/gatk4/samtofastq/** +gatk4/selectvariants: + - modules/gatk4/selectvariants/** + - tests/modules/gatk4/selectvariants/** + gatk4/splitncigarreads: - modules/gatk4/splitncigarreads/** - tests/modules/gatk4/splitncigarreads/** @@ -775,6 +787,10 @@ hmmer/hmmalign: - modules/hmmer/hmmalign/** - tests/modules/hmmer/hmmalign/** +hmmer/hmmsearch: + - modules/hmmer/hmmsearch/** + - tests/modules/hmmer/hmmsearch/** + homer/annotatepeaks: - modules/homer/annotatepeaks/** - tests/modules/homer/annotatepeaks/** @@ -1433,6 +1449,10 @@ seacr/callpeak: - modules/seacr/callpeak/** - tests/modules/seacr/callpeak/** +seqkit/pair: + - modules/seqkit/pair/** + - tests/modules/seqkit/pair/** + seqkit/split2: - modules/seqkit/split2/** - tests/modules/seqkit/split2/** @@ -1453,6 +1473,10 @@ seqtk/sample: - modules/seqtk/sample/** - tests/modules/seqtk/sample/** +seqtk/seq: + - modules/seqtk/seq/** + - tests/modules/seqtk/seq/** + seqtk/subseq: - modules/seqtk/subseq/** - tests/modules/seqtk/subseq/** diff --git a/tests/modules/faqcs/main.nf b/tests/modules/faqcs/main.nf new file mode 100644 index 00000000..eba4bb97 --- /dev/null +++ b/tests/modules/faqcs/main.nf @@ -0,0 +1,30 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { FAQCS } from '../../../modules/faqcs/main.nf' + + +// +// Test with single-end data +// +workflow test_fastp_single_end { + input = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + + FAQCS ( input ) +} + +// +// Test with paired-end data +// +workflow test_fastp_paired_end { + input = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + + FAQCS ( input ) +} + diff --git a/tests/modules/faqcs/nextflow.config b/tests/modules/faqcs/nextflow.config new file mode 100644 index 00000000..8730f1c4 --- /dev/null +++ b/tests/modules/faqcs/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} diff --git a/tests/modules/faqcs/test.yml b/tests/modules/faqcs/test.yml new file mode 100644 index 00000000..47f973f3 --- /dev/null +++ b/tests/modules/faqcs/test.yml @@ -0,0 +1,31 @@ +- name: faqcs test_fastp_single_end + command: nextflow run tests/modules/faqcs -entry test_fastp_single_end -c tests/config/nextflow.config + tags: + - faqcs + files: + - path: output/faqcs/test.fastp.log + md5sum: be79dc893f87de1f82faf749cdfb848c + - path: output/faqcs/test.stats.txt + md5sum: ea20e93706b2e4c676004253baa3cec6 + - path: output/faqcs/test.trimmed.fastq.gz + md5sum: 875863b402f67403dac63ef59b9c9a8a + - path: output/faqcs/test_qc_report.pdf + - path: output/faqcs/versions.yml + md5sum: 2a38d7e7ab5299336e9669c393c9da6c + +- name: faqcs test_fastp_paired_end + command: nextflow run tests/modules/faqcs -entry test_fastp_paired_end -c tests/config/nextflow.config + tags: + - faqcs + files: + - path: output/faqcs/test.fastp.log + md5sum: be79dc893f87de1f82faf749cdfb848c + - path: output/faqcs/test.stats.txt + md5sum: 9a693f8af94ab8c485519d9a523aa622 + - path: output/faqcs/test_1.trimmed.fastq.gz + md5sum: 875863b402f67403dac63ef59b9c9a8a + - path: output/faqcs/test_2.trimmed.fastq.gz + md5sum: 375aeb74819ca3d72203135ac80df78c + - path: output/faqcs/test_qc_report.pdf + - path: output/faqcs/versions.yml + md5sum: 208d54c0cf6dfc54e719b81b990afac9 diff --git a/tests/modules/gatk4/combinegvcfs/main.nf b/tests/modules/gatk4/combinegvcfs/main.nf new file mode 100644 index 00000000..50d42625 --- /dev/null +++ b/tests/modules/gatk4/combinegvcfs/main.nf @@ -0,0 +1,24 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK4_COMBINEGVCFS } from '../../../../modules/gatk4/combinegvcfs/main.nf' + +workflow test_gatk4_combinegvcfs { + + input = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_genome_vcf'], checkIfExists: true) ], + [ file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_idx'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_genome_vcf_idx'], checkIfExists: true) ] + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + + fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + + fasta_dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + + GATK4_COMBINEGVCFS ( input, fasta, fasta_fai, fasta_dict ) +} + diff --git a/tests/modules/gatk4/combinegvcfs/nextflow.config b/tests/modules/gatk4/combinegvcfs/nextflow.config new file mode 100644 index 00000000..573cc13e --- /dev/null +++ b/tests/modules/gatk4/combinegvcfs/nextflow.config @@ -0,0 +1,6 @@ +process { + + ext.args = "--tmp-dir ." + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/gatk4/combinegvcfs/test.yml b/tests/modules/gatk4/combinegvcfs/test.yml new file mode 100644 index 00000000..72d8a6a5 --- /dev/null +++ b/tests/modules/gatk4/combinegvcfs/test.yml @@ -0,0 +1,10 @@ +- name: gatk4 combinegvcfs test_gatk4_combinegvcfs + command: nextflow run tests/modules/gatk4/combinegvcfs -entry test_gatk4_combinegvcfs -c tests/config/nextflow.config + tags: + - gatk4 + - gatk4/combinegvcfs + files: + - path: output/gatk4/test.combined.g.vcf.gz + contains: ['VCFv4.2'] + - path: output/gatk4/versions.yml + md5sum: 49d9c467f84b6a99a4da3ef161af26bd diff --git a/tests/modules/gatk4/selectvariants/main.nf b/tests/modules/gatk4/selectvariants/main.nf new file mode 100644 index 00000000..7005dcd3 --- /dev/null +++ b/tests/modules/gatk4/selectvariants/main.nf @@ -0,0 +1,29 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK4_SELECTVARIANTS } from '../../../../modules/gatk4/selectvariants/main.nf' + +// Basic parameters with uncompressed VCF input +workflow test_gatk4_selectvariants_vcf_input { + + input = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_idx'], checkIfExists: true) + ] + + GATK4_SELECTVARIANTS ( input) +} + +// Basic parameters with compressed VCF input +workflow test_gatk4_selectvariants_gz_input { + + input = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true) + ] + + GATK4_SELECTVARIANTS ( input ) +} diff --git a/tests/modules/gatk4/selectvariants/nextflow.config b/tests/modules/gatk4/selectvariants/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/gatk4/selectvariants/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/gatk4/selectvariants/test.yml b/tests/modules/gatk4/selectvariants/test.yml new file mode 100644 index 00000000..5bc32330 --- /dev/null +++ b/tests/modules/gatk4/selectvariants/test.yml @@ -0,0 +1,21 @@ +- name: gatk4 selectvariants test_gatk4_selectvariants_vcf_input + command: nextflow run tests/modules/gatk4/selectvariants -entry test_gatk4_selectvariants_vcf_input -c tests/config/nextflow.config + tags: + - gatk4/selectvariants + - gatk4 + files: + - path: output/gatk4/test.selectvariants.vcf.gz + - path: output/gatk4/test.selectvariants.vcf.gz.tbi + - path: output/gatk4/versions.yml + md5sum: a35d78af179f43652274bc7405d5a785 + +- name: gatk4 selectvariants test_gatk4_selectvariants_gz_input + command: nextflow run tests/modules/gatk4/selectvariants -entry test_gatk4_selectvariants_gz_input -c tests/config/nextflow.config + tags: + - gatk4/selectvariants + - gatk4 + files: + - path: output/gatk4/test.selectvariants.vcf.gz + - path: output/gatk4/test.selectvariants.vcf.gz.tbi + - path: output/gatk4/versions.yml + md5sum: c943f3579a369968ca63444eb43fb6e7 diff --git a/tests/modules/hmmer/hmmalign/main.nf b/tests/modules/hmmer/hmmalign/main.nf index 8758b124..0a462a7a 100644 --- a/tests/modules/hmmer/hmmalign/main.nf +++ b/tests/modules/hmmer/hmmalign/main.nf @@ -8,10 +8,10 @@ workflow test_hmmer_hmmalign { input = [ [ id:'test' ], // meta map - file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/e_coli_k12_16s.fna') // Change to params.test_data syntax after the data is included in tests/config/test_data.config + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/e_coli_k12_16s.fna.gz') // Change to params.test_data syntax after the data is included in tests/config/test_data.config ] - hmm = file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/bac.16S_rRNA.hmm') + hmm = file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/bac.16S_rRNA.hmm.gz') HMMER_HMMALIGN ( input, hmm ) } diff --git a/tests/modules/hmmer/hmmsearch/main.nf b/tests/modules/hmmer/hmmsearch/main.nf new file mode 100644 index 00000000..b181a529 --- /dev/null +++ b/tests/modules/hmmer/hmmsearch/main.nf @@ -0,0 +1,33 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { HMMER_HMMSEARCH } from '../../../../modules/hmmer/hmmsearch/main.nf' + +workflow test_hmmer_hmmsearch { + + input = [ + [ id:'test', single_end:false ], // meta map + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/bac.16S_rRNA.hmm.gz', checkIfExists: true), + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/e_coli_k12_16s.fna.gz', checkIfExists: true), + false, + false, + false + ] + + HMMER_HMMSEARCH ( input ) +} + +workflow test_hmmer_hmmsearch_optional { + + input = [ + [ id:'test', single_end:false ], // meta map + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/bac.16S_rRNA.hmm.gz', checkIfExists: true), + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/e_coli_k12_16s.fna.gz', checkIfExists: true), + true, + true, + true + ] + + HMMER_HMMSEARCH ( input ) +} diff --git a/tests/modules/hmmer/hmmsearch/nextflow.config b/tests/modules/hmmer/hmmsearch/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/hmmer/hmmsearch/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/hmmer/hmmsearch/test.yml b/tests/modules/hmmer/hmmsearch/test.yml new file mode 100644 index 00000000..91e4775c --- /dev/null +++ b/tests/modules/hmmer/hmmsearch/test.yml @@ -0,0 +1,31 @@ +- name: hmmer hmmsearch test_hmmer_hmmsearch + command: nextflow run tests/modules/hmmer/hmmsearch -entry test_hmmer_hmmsearch -c tests/config/nextflow.config + tags: + - hmmer/hmmsearch + - hmmer + files: + - path: output/hmmer/test.txt.gz + contains: + - '[ok]' + - path: output/hmmer/versions.yml + md5sum: ed0808c10abd205c6bd0fb01f45259bb + +- name: hmmer hmmsearch test_hmmer_hmmsearch_optional + command: nextflow run tests/modules/hmmer/hmmsearch -entry test_hmmer_hmmsearch_optional -c tests/config/nextflow.config + tags: + - hmmer/hmmsearch + - hmmer + files: + - path: output/hmmer/test.sto.gz + md5sum: d3121aa33455074c566fb7f8fdcda7b0 + - path: output/hmmer/test.domtbl.gz + contains: + - '# [ok]' + - path: output/hmmer/test.tbl.gz + contains: + - '# [ok]' + - path: output/hmmer/test.txt.gz + contains: + - '[ok]' + - path: output/hmmer/versions.yml + md5sum: ebdcb08ae540e840f7b5c4c75a3a2993 diff --git a/tests/modules/seqkit/pair/main.nf b/tests/modules/seqkit/pair/main.nf new file mode 100644 index 00000000..42bc9587 --- /dev/null +++ b/tests/modules/seqkit/pair/main.nf @@ -0,0 +1,16 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { SEQKIT_PAIR } from '../../../../modules/seqkit/pair/main.nf' + +workflow test_seqkit_pair { + + input = [ + [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + + SEQKIT_PAIR ( input ) +} diff --git a/tests/modules/seqkit/pair/nextflow.config b/tests/modules/seqkit/pair/nextflow.config new file mode 100644 index 00000000..49de9240 --- /dev/null +++ b/tests/modules/seqkit/pair/nextflow.config @@ -0,0 +1,6 @@ +process { + + ext.args = "-u" + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/seqkit/pair/test.yml b/tests/modules/seqkit/pair/test.yml new file mode 100644 index 00000000..30373d69 --- /dev/null +++ b/tests/modules/seqkit/pair/test.yml @@ -0,0 +1,12 @@ +- name: seqkit pair test_seqkit_pair + command: nextflow run tests/modules/seqkit/pair -entry test_seqkit_pair -c tests/config/nextflow.config + tags: + - seqkit/pair + - seqkit + files: + - path: output/seqkit/test_1.paired.fastq.gz + md5sum: fbfe7e8bdbc29abaaf58b6f1a32448e5 + - path: output/seqkit/test_2.paired.fastq.gz + md5sum: 7d3c0912e5adc2674e8ecc1e647381b3 + - path: output/seqkit/versions.yml + md5sum: 3086293bc986fc2ece38b1951d090819 diff --git a/tests/modules/seqtk/seq/main.nf b/tests/modules/seqtk/seq/main.nf new file mode 100644 index 00000000..4351b8a9 --- /dev/null +++ b/tests/modules/seqtk/seq/main.nf @@ -0,0 +1,19 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { SEQTK_SEQ } from '../../../../modules/seqtk/seq/main.nf' + +workflow test_seqtk_seq { + input = [ [ id:'test' ], // meta map + [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + ] + SEQTK_SEQ ( input ) +} + +workflow test_seqtk_seq_fq { + input = [ [ id:'test' ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + SEQTK_SEQ ( input ) +} \ No newline at end of file diff --git a/tests/modules/seqtk/seq/nextflow.config b/tests/modules/seqtk/seq/nextflow.config new file mode 100644 index 00000000..174bccd9 --- /dev/null +++ b/tests/modules/seqtk/seq/nextflow.config @@ -0,0 +1,7 @@ +process { + // Testing ext.args for passing arguments into seqtk seq + withName: 'SEQTK_SEQ' { + ext.args = '-A' + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + } +} \ No newline at end of file diff --git a/tests/modules/seqtk/seq/test.yml b/tests/modules/seqtk/seq/test.yml new file mode 100644 index 00000000..3e4532c6 --- /dev/null +++ b/tests/modules/seqtk/seq/test.yml @@ -0,0 +1,21 @@ +- name: seqtk seq test_seqtk_seq + command: nextflow run tests/modules/seqtk/seq -entry test_seqtk_seq -c tests/config/nextflow.config + tags: + - seqtk/seq + - seqtk + files: + - path: output/seqtk/test.seqtk-seq.fasta.gz + md5sum: 50d73992c8c7e56dc095ef47ec52a754 + - path: output/seqtk/versions.yml + md5sum: 2b89cd4a6e28f35fcfbbd2188384f944 + +- name: seqtk seq test_seqtk_seq_fq + command: nextflow run tests/modules/seqtk/seq -entry test_seqtk_seq_fq -c tests/config/nextflow.config + tags: + - seqtk/seq + - seqtk + files: + - path: output/seqtk/test.seqtk-seq.fasta.gz + md5sum: 2f009f1647971a97b4edec726a99dc1a + - path: output/seqtk/versions.yml + md5sum: 3467a76d3540bee8f58de050512bddaa diff --git a/tests/modules/svdb/merge/main.nf b/tests/modules/svdb/merge/main.nf index f417c3f7..3cbb4b44 100644 --- a/tests/modules/svdb/merge/main.nf +++ b/tests/modules/svdb/merge/main.nf @@ -14,3 +14,13 @@ workflow test_svdb_merge { SVDB_MERGE ( input, priority ) } + +workflow test_svdb_merge_noprio { + + input = [ [ id:'test' ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_vcf'], checkIfExists: true) ] + ] + + SVDB_MERGE ( input, [] ) +} diff --git a/tests/modules/svdb/merge/test.yml b/tests/modules/svdb/merge/test.yml index 8d16562f..46c4fef0 100644 --- a/tests/modules/svdb/merge/test.yml +++ b/tests/modules/svdb/merge/test.yml @@ -5,3 +5,10 @@ - svdb/merge files: - path: output/svdb/test_sv_merge.vcf +- name: svdb merge noprio + command: nextflow run ./tests/modules/svdb/merge -entry test_svdb_merge_noprio -c ./tests/config/nextflow.config -c ./tests/modules/svdb/merge/nextflow.config + tags: + - svdb + - svdb/merge + files: + - path: output/svdb/test_sv_merge.vcf