Merge branch 'nf-core:master' into master

This commit is contained in:
James A. Fellows Yates 2022-02-27 14:20:41 +01:00 committed by GitHub
commit 8d16261543
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
40 changed files with 1039 additions and 29 deletions

103
modules/faqcs/main.nf Normal file
View file

@ -0,0 +1,103 @@
process FAQCS {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::faqcs=2.10" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/faqcs%3A2.10--r41h9a82719_2' :
'quay.io/biocontainers/faqcs:2.10--r41h9a82719_2' }"
input:
tuple val(meta), path(reads)
output:
tuple val(meta), path('*.trimmed.fastq.gz') , emit: reads
tuple val(meta), path('*.stats.txt') , emit: stats
tuple val(meta), path('*_qc_report.pdf') , optional:true, emit: statspdf
tuple val(meta), path('*.log') , emit: log
tuple val(meta), path('*.discard.fastq.gz') , optional:true, emit: reads_fail
tuple val(meta), path('*.trimmed.unpaired.fastq.gz') , optional:true, emit: reads_unpaired
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
// Added soft-links to original fastqs for consistent naming in MultiQC
def prefix = task.ext.prefix ?: "${meta.id}"
if (meta.single_end) {
"""
[ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz
FaQCs \\
-d . \\
-u ${prefix}.fastq.gz \\
--prefix ${prefix} \\
-t $task.cpus \\
$args \\
2> ${prefix}.fastp.log
if [[ -f ${prefix}.unpaired.trimmed.fastq ]]; then
mv ${prefix}.unpaired.trimmed.fastq ${prefix}.trimmed.fastq
gzip ${prefix}.trimmed.fastq
fi
if [[ -f ${prefix}.discard.trimmed.fastq ]]; then
mv ${prefix}.discard.trimmed.fastq ${prefix}.trimmed.discard.fastq
gzip ${prefix}.trimmed.discard.fastq
fi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
faqcs: \$(echo \$(FaQCs --version 2>&1) | sed 's/^.*Version: //;' )
END_VERSIONS
"""
} else {
"""
[ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz
[ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz
FaQCs \\
-d . \\
-1 ${prefix}_1.fastq.gz \\
-2 ${prefix}_2.fastq.gz \\
--prefix ${meta.id} \\
-t $task.cpus \\
$args \\
2> ${prefix}.fastp.log
# Unpaired
if [[ -f ${prefix}.unpaired.trimmed.fastq ]]; then
# If it is empty remove it
if [[ ! -s ${prefix}.unpaired.trimmed.fastq ]]; then
rm ${prefix}.unpaired.trimmed.fastq
else
mv ${prefix}.unpaired.trimmed.fastq ${prefix}.trimmed.unpaired.fastq
gzip ${prefix}.trimmed.unpaired.fastq
fi
fi
# R1
if [[ -f ${prefix}.1.trimmed.fastq ]]; then
mv ${prefix}.1.trimmed.fastq ${prefix}_1.trimmed.fastq
gzip ${prefix}_1.trimmed.fastq
fi
# R2
if [[ -f ${prefix}.2.trimmed.fastq ]]; then
mv ${prefix}.2.trimmed.fastq ${prefix}_2.trimmed.fastq
gzip ${prefix}_2.trimmed.fastq
fi
# Discarded: Created if --discard argument is passed
if [[ -f ${prefix}.discard.trimmed.fastq ]]; then
mv ${prefix}.discard.trimmed.fastq ${prefix}.trimmed.discard.fastq
gzip ${prefix}.trimmed.discard.fastq
fi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
faqcs: \$(echo \$(FaQCs --version 2>&1) | sed 's/^.*Version: //;' )
END_VERSIONS
"""
}
}

68
modules/faqcs/meta.yml Normal file
View file

@ -0,0 +1,68 @@
name: faqcs
description: Perform adapter and quality trimming on sequencing reads with reporting
keywords:
- trimming
- quality control
- fastq
- faqcs
tools:
- faqcs:
description: |
FaQCs combines several features of currently available applications into a single, user-friendly process, and includes additional unique capabilities such as filtering the PhiX control sequences, conversion of FASTQ formats, and multi-threading. The original data and trimmed summaries are reported within a variety of graphics and reports, providing a simple way to do data quality control and assurance.
homepage: https://github.com/LANL-Bioinformatics/FaQCs
documentation: https://github.com/LANL-Bioinformatics/FaQCs
tool_dev_url: https://github.com/LANL-Bioinformatics/FaQCs
doi: "https://doi.org/10.1186/s12859-014-0366-2"
licence: ['GPLv3 License']
## TODO nf-core: Add a description of all of the variables used as input
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- reads:
type: file
description: The trimmed/modified fastq reads
pattern: "*trimmed.fastq.gz"
- reads_fail:
type: file
description: Reads that failed the preprocessing (Optional with --discard args setting)
pattern: "*discard.fastq.gz"
- reads_unpaired:
type: file
description: Reads without matching mates in paired-end files (Optional)
pattern: "*trimmed.unpaired.fastq.gz"
- stats:
type: file
description: trimming/qc text stats file
pattern: "*.stats.txt"
- statspdf:
type: file
description: trimming/qc pdf report file
pattern: "*_qc_report.pdf"
- log:
type: file
description: fastq log file
pattern: "*.log"
authors:
- "@mjcipriano"
- "@sateeshperi"
- "@hseabolt"

View file

@ -0,0 +1,47 @@
process GATK4_COMBINEGVCFS {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::gatk4=4.2.5.0" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/gatk4:4.2.5.0--hdfd78af_0' :
'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }"
input:
tuple val(meta), path(vcf), path(vcf_idx)
path (fasta)
path (fasta_fai)
path (fasta_dict)
output:
tuple val(meta), path("*.combined.g.vcf.gz"), emit: combined_gvcf
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def avail_mem = 3
if (!task.memory) {
log.info '[GATK COMBINEGVCFS] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
}
def input_files = vcf.collect{"-V ${it}"}.join(' ') // add '-V' to each vcf file
"""
gatk \\
--java-options "-Xmx${avail_mem}g" \\
CombineGVCFs \\
-R ${fasta} \\
-O ${prefix}.combined.g.vcf.gz \\
${args} \\
${input_files}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
END_VERSIONS
"""
}

View file

@ -0,0 +1,54 @@
name: gatk4_combinegvcfs
description: Combine per-sample gVCF files produced by HaplotypeCaller into a multi-sample gVCF file
keywords:
- gvcf
- gatk4
- vcf
- combinegvcfs
- Short_Variant_Discovery
tools:
- gatk4:
description: Genome Analysis Toolkit (GATK4). Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools
with a primary focus on variant discovery and genotyping. Its powerful processing engine
and high-performance computing features make it capable of taking on projects of any size.
homepage: https://gatk.broadinstitute.org/hc/en-us
documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037593911-CombineGVCFs
tool_dev_url: https://github.com/broadinstitute/gatk
doi: 10.1158/1538-7445.AM2017-3590
licence: ['Apache-2.0']
input:
- fasta:
type: file
description: The reference fasta file
pattern: "*.fasta"
- fai:
type: file
description: FASTA index file
pattern: "*.{fai}"
- dict:
type: file
description: FASTA dictionary file
pattern: "*.{dict}"
- vcf:
type: file
description: Compressed VCF files
pattern: "*.vcf.gz"
- vcf_idx:
type: file
description: VCF Index file
pattern: "*.{fai}"
output:
- gvcf:
type: file
description: Compressed Combined GVCF file
pattern: "*.combined.g.vcf.gz"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@sateeshperi"
- "@mjcipriano"
- "@hseabolt"

View file

@ -0,0 +1,41 @@
process GATK4_SELECTVARIANTS {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::gatk4=4.2.5.0" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/gatk4:4.2.5.0--hdfd78af_0':
'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }"
input:
tuple val(meta), path(vcf), path(vcf_idx)
output:
tuple val(meta), path("*.selectvariants.vcf.gz") , emit: vcf
tuple val(meta), path("*.selectvariants.vcf.gz.tbi") , emit: tbi
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def avail_mem = 3
if (!task.memory) {
log.info '[GATK VariantFiltration] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.toGiga()
}
"""
gatk --java-options "-Xmx${avail_mem}G" SelectVariants \\
-V $vcf \\
-O ${prefix}.selectvariants.vcf.gz \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
END_VERSIONS
"""
}

View file

@ -0,0 +1,55 @@
name: gatk4_selectvariants
description: Select a subset of variants from a VCF file
keywords:
- gatk
- gatk4
- selectvariants
- vcf
tools:
- gatk4:
description: |
Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools
with a primary focus on variant discovery and genotyping. Its powerful processing engine
and high-performance computing features make it capable of taking on projects of any size.
homepage: https://gatk.broadinstitute.org/hc/en-us
documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360036362532-SelectVariants
tool_dev_url: https://github.com/broadinstitute/gatk
doi: 10.1158/1538-7445.AM2017-3590
licence: ["Apache-2.0"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test']
- vcf:
type: list
description: VCF(.gz) file
pattern: "*.{vcf,vcf.gz}"
- vcf_idx:
type: list
description: VCF file index
pattern: "*.{idx,tbi}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- vcf:
type: file
description: Compressed VCF file
pattern: "*.selectvariants.vcf.gz"
- vcf_tbi:
type: list
description: VCF file index
pattern: "*.{idx,tbi}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@mjcipriano"

View file

@ -21,13 +21,11 @@ process HMMER_HMMALIGN {
script: script:
def args = task.ext.args ?: '' def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}" def prefix = task.ext.prefix ?: "${meta.id}"
def fastacmd = fasta.getExtension() == 'gz' ? "gunzip -c $fasta" : "cat $fasta"
""" """
$fastacmd | \\ hmmalign \\
hmmalign \\
$args \\ $args \\
$hmm \\ $hmm \\
- | gzip -c > ${meta.id}.sthlm.gz $fasta | gzip -c > ${meta.id}.sthlm.gz
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
"${task.process}": "${task.process}":

View file

@ -19,12 +19,12 @@ input:
e.g. [ id:'test' ] e.g. [ id:'test' ]
- fasta: - fasta:
type: file type: file
description: Amino acid or nucleotide fasta file, gzipped or not description: Amino acid or nucleotide gzipped compressed fasta file
pattern: "*.{fna,fna.gz,faa,faa.gz,fasta,fasta.gz,fa,fa.gz}" pattern: "*.{fna.gz,faa.gz,fasta.gz,fa.gz}"
- hmm: - hmm:
type: file type: file
description: HMM file description: A gzipped HMM file
pattern: "*.hmm" pattern: "*.hmm.gz"
output: output:
- meta: - meta:
@ -43,3 +43,4 @@ output:
authors: authors:
- "@erikrikarddaniel" - "@erikrikarddaniel"
- "@jfy133"

View file

@ -0,0 +1,51 @@
process HMMER_HMMSEARCH {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::hmmer=3.3.2" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/hmmer:3.3.2--h1b792b2_1' :
'quay.io/biocontainers/hmmer:3.3.2--h1b792b2_1' }"
input:
tuple val(meta), path(hmmfile), path(seqdb), val(write_align), val(write_target), val(write_domain)
output:
tuple val(meta), path('*.txt.gz') , emit: output
tuple val(meta), path('*.sto.gz') , emit: alignments , optional: true
tuple val(meta), path('*.tbl.gz') , emit: target_summary, optional: true
tuple val(meta), path('*.domtbl.gz'), emit: domain_summary, optional: true
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
output = "${prefix}.txt"
alignment = write_align ? "-A ${prefix}.sto" : ''
target_summary = write_target ? "--tblout ${prefix}.tbl" : ''
domain_summary = write_domain ? "--domtblout ${prefix}.domtbl" : ''
"""
hmmsearch \\
$args \\
--cpu $task.cpus \\
-o $output \\
$alignment \\
$target_summary \\
$domain_summary \\
$hmmfile \\
$seqdb
gzip --no-name *.txt \\
${write_align ? '*.sto' : ''} \\
${write_target ? '*.tbl' : ''} \\
${write_domain ? '*.domtbl' : ''}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
hmmer: \$(hmmsearch -h | grep -o '^# HMMER [0-9.]*' | sed 's/^# HMMER *//')
END_VERSIONS
"""
}

View file

@ -0,0 +1,69 @@
name: hmmer_hmmsearch
description: search profile(s) against a sequence database
keywords:
- hidden Markov model
- HMM
- hmmer
- hmmsearch
tools:
- hmmer:
description: Biosequence analysis using profile hidden Markov models
homepage: http://hmmer.org/
documentation: http://hmmer.org/documentation.html
tool_dev_url: https://github.com/EddyRivasLab/hmmer
doi: "10.1371/journal.pcbi.1002195"
licence: ['BSD']
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- hmmfile:
type: file
description: One or more HMM profiles created with hmmbuild
pattern: "*.{hmm,hmm.gz}"
- seqdb:
type: file
description: Database of sequences in FASTA format
pattern: "*.{fasta,fna,faa,fa,fasta.gz,fna.gz,faa.gz,fa.gz}"
- write_align:
type: val
description: Flag to write optional alignment output. Specify with 'true' to output
- write_target:
type: val
description: Flag to write optional per target summary . Specify with 'true' to output
- write_domain:
type: val
description: Flag to write optional per domain summary. Specify with 'true' to output
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- output:
type: file
description: Human readable output summarizing hmmsearch results
pattern: "*.{txt.gz}"
- alignments:
type: file
description: Optional multiple sequence alignment (MSA) in Stockholm format
pattern: "*.{sto.gz}"
- target_summary:
type: file
description: Optional tabular (space-delimited) summary of per-target output
pattern: "*.{tbl.gz}"
- domain_summary:
type: file
description: Optional tabular (space-delimited) summary of per-domain output
pattern: "*.{domtbl.gz}"
authors:
- "@Midnighter"

View file

@ -1,6 +1,6 @@
process PICARD_CLEANSAM { process PICARD_CLEANSAM {
tag "$meta.id" tag "$meta.id"
label 'process_low' label 'process_medium'
conda (params.enable_conda ? "bioconda::picard=2.26.9" : null) conda (params.enable_conda ? "bioconda::picard=2.26.9" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
@ -8,10 +8,10 @@ process PICARD_CLEANSAM {
'quay.io/biocontainers/picard:2.26.9--hdfd78af_0' }" 'quay.io/biocontainers/picard:2.26.9--hdfd78af_0' }"
input: input:
tuple val(meta), path(sam) tuple val(meta), path(bam)
output: output:
tuple val(meta), path("*.sam"), emit: sam tuple val(meta), path("*.bam"), emit: bam
path "versions.yml" , emit: versions path "versions.yml" , emit: versions
when: when:
@ -20,7 +20,6 @@ process PICARD_CLEANSAM {
script: script:
def args = task.ext.args ?: '' def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}" def prefix = task.ext.prefix ?: "${meta.id}"
def STRINGENCY = task.ext.stringency ?: "STRICT"
def avail_mem = 3 def avail_mem = 3
if (!task.memory) { if (!task.memory) {
log.info '[Picard CleanSam] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' log.info '[Picard CleanSam] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
@ -32,9 +31,8 @@ process PICARD_CLEANSAM {
-Xmx${avail_mem}g \\ -Xmx${avail_mem}g \\
CleanSam \\ CleanSam \\
${args} \\ ${args} \\
-I ${sam} \\ -I ${bam} \\
-O ${prefix}.sam \\ -O ${prefix}.bam
--VALIDATION_STRINGENCY ${STRINGENCY}
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
"${task.process}": "${task.process}":

View file

@ -1,8 +1,7 @@
name: picard_cleansam name: picard_cleansam
description: Cleans the provided SAM/BAM, soft-clipping beyond-end-of-reference alignments and setting MAPQ to 0 for unmapped reads description: Cleans the provided BAM, soft-clipping beyond-end-of-reference alignments and setting MAPQ to 0 for unmapped reads
keywords: keywords:
- clean - clean
- sam
- bam - bam
tools: tools:
- picard: - picard:
@ -22,8 +21,8 @@ input:
e.g. [ id:'test', single_end:false ] e.g. [ id:'test', single_end:false ]
- sam: - sam:
type: file type: file
description: SAM file description: BAM file
pattern: "*.{sam}" pattern: "*.{bam}"
output: output:
- meta: - meta:
@ -37,8 +36,8 @@ output:
pattern: "versions.yml" pattern: "versions.yml"
- sam: - sam:
type: file type: file
description: Cleaned SAM file description: Cleaned BAM file
pattern: "*.{sam}" pattern: "*.{bam}"
authors: authors:
- "@sateeshperi" - "@sateeshperi"

View file

@ -0,0 +1,40 @@
process SEQKIT_PAIR {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::seqkit=2.1.0" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/seqkit:2.1.0--h9ee0642_0':
'quay.io/biocontainers/seqkit:2.1.0--h9ee0642_0' }"
input:
tuple val(meta), path(reads)
output:
tuple val(meta), path("*.paired.fastq.gz") , emit: reads
tuple val(meta), path("*.unpaired.fastq.gz"), optional: true, emit: unpaired_reads
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
seqkit \\
pair \\
-1 ${reads[0]} \\
-2 ${reads[1]} \\
$args \\
--threads $task.cpus
# gzip fastq
find . -maxdepth 1 -name "*.fastq" -exec gzip {} \;
cat <<-END_VERSIONS > versions.yml
"${task.process}":
seqkit: \$( seqkit | sed '3!d; s/Version: //' )
END_VERSIONS
"""
}

View file

@ -0,0 +1,48 @@
name: seqkit_pair
description: match up paired-end reads from two fastq files
keywords:
- seqkit
- pair
tools:
- seqkit:
description: Cross-platform and ultrafast toolkit for FASTA/Q file manipulation, written by Wei Shen.
homepage: https://bioinf.shenwei.me/seqkit/usage/
documentation: https://bioinf.shenwei.me/seqkit/usage/
tool_dev_url: https://github.com/shenwei356/seqkit/
doi: "10.1371/journal.pone.0163962"
licence: ['MIT']
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input paired-end FastQ files.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- reads:
type: file
description: Paired fastq reads
pattern: "*.paired.fastq.gz"
- unpaired_reads:
type: file
description: Unpaired reads (optional)
pattern: "*.unpaired.fastq.gz"
authors:
- "@sateeshperi"
- "@mjcipriano"
- "@hseabolt"

40
modules/seqtk/seq/main.nf Normal file
View file

@ -0,0 +1,40 @@
process SEQTK_SEQ {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::seqtk=1.3" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/seqtk:1.3--h5bf99c6_3' :
'quay.io/biocontainers/seqtk:1.3--h5bf99c6_3' }"
input:
tuple val(meta), path(fastx)
output:
tuple val(meta), path("*.gz") , emit: fastx
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def extension = "fastq"
if ("$fastx" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz/ || "$args" ==~ /\-[aA]/ ) {
extension = "fasta"
}
"""
seqtk \\
seq \\
$args \\
$fastx | \\
gzip -c > ${prefix}.seqtk-seq.${extension}.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//')
END_VERSIONS
"""
}

View file

@ -0,0 +1,42 @@
name: seqtk_seq
description: Common transformation operations on FASTA or FASTQ files.
keywords:
- seq
tools:
- seqtk:
description: Seqtk is a fast and lightweight tool for processing sequences in the FASTA or FASTQ format. The seqtk seq command enables common transformation operations on FASTA or FASTQ files.
homepage: https://github.com/lh3/seqtk
documentation: https://docs.csc.fi/apps/seqtk/
tool_dev_url: https://github.com/lh3/seqtk
licence: ['MIT']
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- sequences:
type: file
description: A FASTQ or FASTA file
pattern: "*.{fastq.gz, fastq, fq, fq.gz, fasta, fastq.gz, fa, fa.gz, fas, fas.gz, fna, fna.gz}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- sequences:
type: file
description: FASTQ/FASTA file containing renamed sequences
pattern: "*.{fastq.gz, fasta.gz}"
authors:
- "@hseabolt"
- "@mjcipriano"
- "@sateeshperi"

View file

@ -21,15 +21,19 @@ process SVDB_MERGE {
script: script:
def args = task.ext.args ?: '' def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}" def prefix = task.ext.prefix ?: "${meta.id}"
def input = "" def input = "${vcfs.join(" ")}"
for (int index = 0; index < vcfs.size(); index++) { def prio = ""
input += " ${vcfs[index]}:${priority[index]}" if(priority) {
prio = "--priority ${priority.join(',')}"
for (int index = 0; index < vcfs.size(); index++) {
input += " ${vcfs[index]}:${priority[index]}"
}
} }
""" """
svdb \\ svdb \\
--merge \\ --merge \\
$args \\ $args \\
--priority ${priority.join(',')} \\ $prio \\
--vcf $input \\ --vcf $input \\
> ${prefix}_sv_merge.vcf > ${prefix}_sv_merge.vcf

View file

@ -504,6 +504,10 @@ expansionhunter:
- modules/expansionhunter/** - modules/expansionhunter/**
- tests/modules/expansionhunter/** - tests/modules/expansionhunter/**
faqcs:
- modules/faqcs/**
- tests/modules/faqcs/**
fargene: fargene:
- modules/fargene/** - modules/fargene/**
- tests/modules/fargene/** - tests/modules/fargene/**
@ -576,6 +580,10 @@ gatk4/calculatecontamination:
- modules/gatk4/calculatecontamination/** - modules/gatk4/calculatecontamination/**
- tests/modules/gatk4/calculatecontamination/** - tests/modules/gatk4/calculatecontamination/**
gatk4/combinegvcfs:
- modules/gatk4/combinegvcfs/**
- tests/modules/gatk4/combinegvcfs/**
gatk4/createsequencedictionary: gatk4/createsequencedictionary:
- modules/gatk4/createsequencedictionary/** - modules/gatk4/createsequencedictionary/**
- tests/modules/gatk4/createsequencedictionary/** - tests/modules/gatk4/createsequencedictionary/**
@ -664,6 +672,10 @@ gatk4/samtofastq:
- modules/gatk4/samtofastq/** - modules/gatk4/samtofastq/**
- tests/modules/gatk4/samtofastq/** - tests/modules/gatk4/samtofastq/**
gatk4/selectvariants:
- modules/gatk4/selectvariants/**
- tests/modules/gatk4/selectvariants/**
gatk4/splitncigarreads: gatk4/splitncigarreads:
- modules/gatk4/splitncigarreads/** - modules/gatk4/splitncigarreads/**
- tests/modules/gatk4/splitncigarreads/** - tests/modules/gatk4/splitncigarreads/**
@ -775,6 +787,10 @@ hmmer/hmmalign:
- modules/hmmer/hmmalign/** - modules/hmmer/hmmalign/**
- tests/modules/hmmer/hmmalign/** - tests/modules/hmmer/hmmalign/**
hmmer/hmmsearch:
- modules/hmmer/hmmsearch/**
- tests/modules/hmmer/hmmsearch/**
homer/annotatepeaks: homer/annotatepeaks:
- modules/homer/annotatepeaks/** - modules/homer/annotatepeaks/**
- tests/modules/homer/annotatepeaks/** - tests/modules/homer/annotatepeaks/**
@ -1429,6 +1445,10 @@ seacr/callpeak:
- modules/seacr/callpeak/** - modules/seacr/callpeak/**
- tests/modules/seacr/callpeak/** - tests/modules/seacr/callpeak/**
seqkit/pair:
- modules/seqkit/pair/**
- tests/modules/seqkit/pair/**
seqkit/split2: seqkit/split2:
- modules/seqkit/split2/** - modules/seqkit/split2/**
- tests/modules/seqkit/split2/** - tests/modules/seqkit/split2/**
@ -1449,6 +1469,10 @@ seqtk/sample:
- modules/seqtk/sample/** - modules/seqtk/sample/**
- tests/modules/seqtk/sample/** - tests/modules/seqtk/sample/**
seqtk/seq:
- modules/seqtk/seq/**
- tests/modules/seqtk/seq/**
seqtk/subseq: seqtk/subseq:
- modules/seqtk/subseq/** - modules/seqtk/subseq/**
- tests/modules/seqtk/subseq/** - tests/modules/seqtk/subseq/**

View file

@ -0,0 +1,30 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { FAQCS } from '../../../modules/faqcs/main.nf'
//
// Test with single-end data
//
workflow test_fastp_single_end {
input = [ [ id:'test', single_end:true ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
]
FAQCS ( input )
}
//
// Test with paired-end data
//
workflow test_fastp_paired_end {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
]
FAQCS ( input )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,31 @@
- name: faqcs test_fastp_single_end
command: nextflow run tests/modules/faqcs -entry test_fastp_single_end -c tests/config/nextflow.config
tags:
- faqcs
files:
- path: output/faqcs/test.fastp.log
md5sum: be79dc893f87de1f82faf749cdfb848c
- path: output/faqcs/test.stats.txt
md5sum: ea20e93706b2e4c676004253baa3cec6
- path: output/faqcs/test.trimmed.fastq.gz
md5sum: 875863b402f67403dac63ef59b9c9a8a
- path: output/faqcs/test_qc_report.pdf
- path: output/faqcs/versions.yml
md5sum: 2a38d7e7ab5299336e9669c393c9da6c
- name: faqcs test_fastp_paired_end
command: nextflow run tests/modules/faqcs -entry test_fastp_paired_end -c tests/config/nextflow.config
tags:
- faqcs
files:
- path: output/faqcs/test.fastp.log
md5sum: be79dc893f87de1f82faf749cdfb848c
- path: output/faqcs/test.stats.txt
md5sum: 9a693f8af94ab8c485519d9a523aa622
- path: output/faqcs/test_1.trimmed.fastq.gz
md5sum: 875863b402f67403dac63ef59b9c9a8a
- path: output/faqcs/test_2.trimmed.fastq.gz
md5sum: 375aeb74819ca3d72203135ac80df78c
- path: output/faqcs/test_qc_report.pdf
- path: output/faqcs/versions.yml
md5sum: 208d54c0cf6dfc54e719b81b990afac9

View file

@ -0,0 +1,24 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { GATK4_COMBINEGVCFS } from '../../../../modules/gatk4/combinegvcfs/main.nf'
workflow test_gatk4_combinegvcfs {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_genome_vcf'], checkIfExists: true) ],
[ file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_idx'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_genome_vcf_idx'], checkIfExists: true) ]
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
fasta_dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
GATK4_COMBINEGVCFS ( input, fasta, fasta_fai, fasta_dict )
}

View file

@ -0,0 +1,6 @@
process {
ext.args = "--tmp-dir ."
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,10 @@
- name: gatk4 combinegvcfs test_gatk4_combinegvcfs
command: nextflow run tests/modules/gatk4/combinegvcfs -entry test_gatk4_combinegvcfs -c tests/config/nextflow.config
tags:
- gatk4
- gatk4/combinegvcfs
files:
- path: output/gatk4/test.combined.g.vcf.gz
contains: ['VCFv4.2']
- path: output/gatk4/versions.yml
md5sum: 49d9c467f84b6a99a4da3ef161af26bd

View file

@ -0,0 +1,29 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { GATK4_SELECTVARIANTS } from '../../../../modules/gatk4/selectvariants/main.nf'
// Basic parameters with uncompressed VCF input
workflow test_gatk4_selectvariants_vcf_input {
input = [
[ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_idx'], checkIfExists: true)
]
GATK4_SELECTVARIANTS ( input)
}
// Basic parameters with compressed VCF input
workflow test_gatk4_selectvariants_gz_input {
input = [
[ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true)
]
GATK4_SELECTVARIANTS ( input )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,21 @@
- name: gatk4 selectvariants test_gatk4_selectvariants_vcf_input
command: nextflow run tests/modules/gatk4/selectvariants -entry test_gatk4_selectvariants_vcf_input -c tests/config/nextflow.config
tags:
- gatk4/selectvariants
- gatk4
files:
- path: output/gatk4/test.selectvariants.vcf.gz
- path: output/gatk4/test.selectvariants.vcf.gz.tbi
- path: output/gatk4/versions.yml
md5sum: a35d78af179f43652274bc7405d5a785
- name: gatk4 selectvariants test_gatk4_selectvariants_gz_input
command: nextflow run tests/modules/gatk4/selectvariants -entry test_gatk4_selectvariants_gz_input -c tests/config/nextflow.config
tags:
- gatk4/selectvariants
- gatk4
files:
- path: output/gatk4/test.selectvariants.vcf.gz
- path: output/gatk4/test.selectvariants.vcf.gz.tbi
- path: output/gatk4/versions.yml
md5sum: c943f3579a369968ca63444eb43fb6e7

View file

@ -8,10 +8,10 @@ workflow test_hmmer_hmmalign {
input = [ input = [
[ id:'test' ], // meta map [ id:'test' ], // meta map
file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/e_coli_k12_16s.fna') // Change to params.test_data syntax after the data is included in tests/config/test_data.config file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/e_coli_k12_16s.fna.gz') // Change to params.test_data syntax after the data is included in tests/config/test_data.config
] ]
hmm = file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/bac.16S_rRNA.hmm') hmm = file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/bac.16S_rRNA.hmm.gz')
HMMER_HMMALIGN ( input, hmm ) HMMER_HMMALIGN ( input, hmm )
} }

View file

@ -0,0 +1,33 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { HMMER_HMMSEARCH } from '../../../../modules/hmmer/hmmsearch/main.nf'
workflow test_hmmer_hmmsearch {
input = [
[ id:'test', single_end:false ], // meta map
file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/bac.16S_rRNA.hmm.gz', checkIfExists: true),
file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/e_coli_k12_16s.fna.gz', checkIfExists: true),
false,
false,
false
]
HMMER_HMMSEARCH ( input )
}
workflow test_hmmer_hmmsearch_optional {
input = [
[ id:'test', single_end:false ], // meta map
file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/bac.16S_rRNA.hmm.gz', checkIfExists: true),
file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/e_coli_k12_16s.fna.gz', checkIfExists: true),
true,
true,
true
]
HMMER_HMMSEARCH ( input )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,31 @@
- name: hmmer hmmsearch test_hmmer_hmmsearch
command: nextflow run tests/modules/hmmer/hmmsearch -entry test_hmmer_hmmsearch -c tests/config/nextflow.config
tags:
- hmmer/hmmsearch
- hmmer
files:
- path: output/hmmer/test.txt.gz
contains:
- '[ok]'
- path: output/hmmer/versions.yml
md5sum: ed0808c10abd205c6bd0fb01f45259bb
- name: hmmer hmmsearch test_hmmer_hmmsearch_optional
command: nextflow run tests/modules/hmmer/hmmsearch -entry test_hmmer_hmmsearch_optional -c tests/config/nextflow.config
tags:
- hmmer/hmmsearch
- hmmer
files:
- path: output/hmmer/test.sto.gz
md5sum: d3121aa33455074c566fb7f8fdcda7b0
- path: output/hmmer/test.domtbl.gz
contains:
- '# [ok]'
- path: output/hmmer/test.tbl.gz
contains:
- '# [ok]'
- path: output/hmmer/test.txt.gz
contains:
- '[ok]'
- path: output/hmmer/versions.yml
md5sum: ebdcb08ae540e840f7b5c4c75a3a2993

View file

@ -4,7 +4,7 @@
- picard/cleansam - picard/cleansam
- picard - picard
files: files:
- path: output/picard/test.sam - path: output/picard/test.bam
md5sum: e314171a6060eb79947c13ad126ddf00 md5sum: a48f8e77a1480445efc57570c3a38a68
- path: output/picard/versions.yml - path: output/picard/versions.yml
md5sum: e6457d7c6de51bf6f4b577eda65e57ac md5sum: e6457d7c6de51bf6f4b577eda65e57ac

View file

@ -0,0 +1,16 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { SEQKIT_PAIR } from '../../../../modules/seqkit/pair/main.nf'
workflow test_seqkit_pair {
input = [
[ id:'test', single_end:false ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
]
SEQKIT_PAIR ( input )
}

View file

@ -0,0 +1,6 @@
process {
ext.args = "-u"
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,12 @@
- name: seqkit pair test_seqkit_pair
command: nextflow run tests/modules/seqkit/pair -entry test_seqkit_pair -c tests/config/nextflow.config
tags:
- seqkit/pair
- seqkit
files:
- path: output/seqkit/test_1.paired.fastq.gz
md5sum: fbfe7e8bdbc29abaaf58b6f1a32448e5
- path: output/seqkit/test_2.paired.fastq.gz
md5sum: 7d3c0912e5adc2674e8ecc1e647381b3
- path: output/seqkit/versions.yml
md5sum: 3086293bc986fc2ece38b1951d090819

View file

@ -0,0 +1,19 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { SEQTK_SEQ } from '../../../../modules/seqtk/seq/main.nf'
workflow test_seqtk_seq {
input = [ [ id:'test' ], // meta map
[ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
]
SEQTK_SEQ ( input )
}
workflow test_seqtk_seq_fq {
input = [ [ id:'test' ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
]
SEQTK_SEQ ( input )
}

View file

@ -0,0 +1,7 @@
process {
// Testing ext.args for passing arguments into seqtk seq
withName: 'SEQTK_SEQ' {
ext.args = '-A'
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}
}

View file

@ -0,0 +1,21 @@
- name: seqtk seq test_seqtk_seq
command: nextflow run tests/modules/seqtk/seq -entry test_seqtk_seq -c tests/config/nextflow.config
tags:
- seqtk/seq
- seqtk
files:
- path: output/seqtk/test.seqtk-seq.fasta.gz
md5sum: 50d73992c8c7e56dc095ef47ec52a754
- path: output/seqtk/versions.yml
md5sum: 2b89cd4a6e28f35fcfbbd2188384f944
- name: seqtk seq test_seqtk_seq_fq
command: nextflow run tests/modules/seqtk/seq -entry test_seqtk_seq_fq -c tests/config/nextflow.config
tags:
- seqtk/seq
- seqtk
files:
- path: output/seqtk/test.seqtk-seq.fasta.gz
md5sum: 2f009f1647971a97b4edec726a99dc1a
- path: output/seqtk/versions.yml
md5sum: 3467a76d3540bee8f58de050512bddaa

View file

@ -14,3 +14,13 @@ workflow test_svdb_merge {
SVDB_MERGE ( input, priority ) SVDB_MERGE ( input, priority )
} }
workflow test_svdb_merge_noprio {
input = [ [ id:'test' ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test2_vcf'], checkIfExists: true) ]
]
SVDB_MERGE ( input, [] )
}

View file

@ -5,3 +5,10 @@
- svdb/merge - svdb/merge
files: files:
- path: output/svdb/test_sv_merge.vcf - path: output/svdb/test_sv_merge.vcf
- name: svdb merge noprio
command: nextflow run ./tests/modules/svdb/merge -entry test_svdb_merge_noprio -c ./tests/config/nextflow.config -c ./tests/modules/svdb/merge/nextflow.config
tags:
- svdb
- svdb/merge
files:
- path: output/svdb/test_sv_merge.vcf