Merge branch 'master' into new-module/EMBOSS_SEQRET

This commit is contained in:
Simon Pearce 2022-10-02 15:43:08 +01:00 committed by GitHub
commit 26d206cca3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
28 changed files with 500 additions and 321 deletions

View file

@ -9,7 +9,7 @@ process BUSCO {
input: input:
tuple val(meta), path('tmp_input/*') tuple val(meta), path('tmp_input/*')
each lineage // Required: lineage to check against, "auto" enables --auto-lineage instead val lineage // Required: lineage to check against, "auto" enables --auto-lineage instead
path busco_lineages_path // Recommended: path to busco lineages - downloads if not set path busco_lineages_path // Recommended: path to busco lineages - downloads if not set
path config_file // Optional: busco configuration file path config_file // Optional: busco configuration file

View file

@ -14,7 +14,8 @@ process CHECKM_LINEAGEWF {
output: output:
tuple val(meta), path("${prefix}") , emit: checkm_output tuple val(meta), path("${prefix}") , emit: checkm_output
tuple val(meta), path("${prefix}.tsv"), emit: checkm_tsv tuple val(meta), path("${prefix}/lineage.ms"), emit: marker_file
tuple val(meta), path("${prefix}.tsv") , emit: checkm_tsv
path "versions.yml" , emit: versions path "versions.yml" , emit: versions
when: when:

View file

@ -52,6 +52,10 @@ output:
type: directory type: directory
description: CheckM output directory description: CheckM output directory
pattern: "*/" pattern: "*/"
- checkm_output:
type: file
description: Lineage markfer file
pattern: "lineage.ms"
- checkm_tsv: - checkm_tsv:
type: file type: file
description: CheckM summary completeness statistics table description: CheckM summary completeness statistics table

44
modules/checkm/qa/main.nf Normal file
View file

@ -0,0 +1,44 @@
process CHECKM_QA {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::checkm-genome=1.2.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/checkm-genome:1.2.1--pyhdfd78af_0' :
'quay.io/biocontainers/checkm-genome:1.2.1--pyhdfd78af_0' }"
input:
tuple val(meta), path(analysis_dir), path(marker_file), path(coverage_file)
path exclude_marker_file
output:
tuple val(meta), path("${prefix}.txt") , optional: true, emit: output
tuple val(meta), path("${prefix}.fasta"), optional: true, emit: fasta
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
suffix = task.ext.args?.matches(".*-o 9.*|.*--out_file 9.*") ? "fasta" : "txt"
def coverage = coverage_file ? "--coverage_file ${coverage_file}" : ""
def exclude = exclude_marker_file ? "--exclude_markers ${marker_filer}" : ""
"""
checkm \\
qa \\
--threads ${task.cpus} \\
--file ${prefix}.${suffix} \\
$marker_file \\
$analysis_dir \\
$coverage \\
$exclude \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
checkm: \$( checkm 2>&1 | grep '...:::' | sed 's/.*CheckM v//;s/ .*//' )
END_VERSIONS
"""
}

View file

@ -0,0 +1,67 @@
name: checkm_qa
description: CheckM provides a set of tools for assessing the quality of genomes recovered from isolates, single cells, or metagenomes.
keywords:
- checkm
- mag
- metagenome
- quality
- isolates
- microbes
- single cells
- completeness
- contamination
- bins
- genome bins
- qa
- quality assurnce
tools:
- checkm:
description: Assess the quality of microbial genomes recovered from isolates, single cells, and metagenomes.
homepage: https://ecogenomics.github.io/CheckM/
documentation: https://github.com/Ecogenomics/CheckM/wiki
tool_dev_url: https://github.com/Ecogenomics/CheckM
doi: "10.1101/gr.186072.114"
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- analysis_dir:
type: file
description: Directory containing output of checkm/analyze or checkm/lineage_wf etc.
pattern: "*"
- marker_file:
type: file
description: Marker file specified during checkm/analyze or produced by checkm/{lineage,taxonomy}_wf
pattern: "*.ms"
- coverage_file:
type: file
description: File containing coverage of each sequence (generated by checkm coverage)
- exclude_marker_file:
type: file
description: File specifying markers to exclude from marker sets
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- output:
type: file
description: "Default completeness statistics in various formats, as specified with --out_format (excluding option: 9)"
pattern: "*.txt"
- fasta:
type: file
description: Output in fasta format (only if --out_format 9)
pattern: "*.fasta"
authors:
- "@jfy133"

View file

@ -0,0 +1,49 @@
process FCS_FCSADAPTOR {
tag "$meta.id"
label 'process_low'
if (params.enable_conda) {
exit 1, "Conda environments cannot be used when using the FCS tool. Please use docker or singularity containers."
}
// WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/releases/0.2.3/fcs-adaptor.0.2.3.sif':
'ncbi/fcs-adaptor:0.2.3' }"
input:
tuple val(meta), path(assembly)
output:
tuple val(meta), path("*.cleaned_sequences.fa.gz"), emit: cleaned_assembly
tuple val(meta), path("*.fcs_adaptor_report.txt") , emit: adaptor_report
tuple val(meta), path("*.fcs_adaptor.log") , emit: log
tuple val(meta), path("*.pipeline_args.yaml") , emit: pipeline_args
tuple val(meta), path("*.skipped_trims.jsonl") , emit: skipped_trims
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: '--prok' // --prok || --euk
def prefix = task.ext.prefix ?: "${meta.id}"
def FCSADAPTOR_VERSION = '0.2.3' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
"""
/app/fcs/bin/av_screen_x \\
-o output/ \\
$args \\
$assembly
# compress and/or rename files with prefix
gzip -cf output/cleaned_sequences/* > "${prefix}.cleaned_sequences.fa.gz"
cp "output/fcs_adaptor_report.txt" "${prefix}.fcs_adaptor_report.txt"
cp "output/fcs_adaptor.log" "${prefix}.fcs_adaptor.log"
cp "output/pipeline_args.yaml" "${prefix}.pipeline_args.yaml"
cp "output/skipped_trims.jsonl" "${prefix}.skipped_trims.jsonl"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
FCS-adaptor: $FCSADAPTOR_VERSION
END_VERSIONS
"""
}

View file

@ -0,0 +1,62 @@
name: "fcs_fcsadaptor"
description: Run NCBI's FCS adaptor on assembled genomes
keywords:
- assembly
- genomics
- quality control
- contamination
- NCBI
tools:
- "fcs":
description: |
The Foreign Contamination Screening (FCS) tool rapidly detects contaminants from foreign
organisms in genome assemblies to prepare your data for submission. Therefore, the
submission process to NCBI is faster and fewer contaminated genomes are submitted.
This reduces errors in analyses and conclusions, not just for the original data submitter
but for all subsequent users of the assembly.
homepage: "https://www.ncbi.nlm.nih.gov/data-hub/cgr/data-quality-tools/"
documentation: "https://github.com/ncbi/fcs/wiki/FCS-adaptor"
tool_dev_url: "https://github.com/ncbi/fcs"
doi: ""
licence: "United States Government Work"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- assembly:
type: file
description: assembly fasta file
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- cleaned_assembly:
type: file
description: Cleaned assembly in fasta format
pattern: "*.{cleaned_sequences.fa.gz}"
- adaptor_report:
type: file
description: Report of identified adaptors
pattern: "*.{fcs_adaptor_report.txt}"
- log:
type: file
description: Log file
pattern: "*.{fcs_adaptor.log}"
- pipeline_args:
type: file
description: Run arguments
pattern: "*.{pipeline_args.yaml}"
- skipped_trims:
type: file
description: Skipped trim information
pattern: "*.{skipped_trims.jsonl}"
authors:
- "@d4straub"

View file

@ -3,10 +3,10 @@ process HISAT2_ALIGN {
label 'process_high' label 'process_high'
// WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
conda (params.enable_conda ? "bioconda::hisat2=2.2.0 bioconda::samtools=1.15.1" : null) conda (params.enable_conda ? "bioconda::hisat2=2.2.1 bioconda::samtools=1.15.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1:0e773bb207600fcb4d38202226eb20a33c7909b6-0' : 'https://depot.galaxyproject.org/singularity/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1:38aed4501da19db366dc7c8d52d31d94e760cfaf-0' :
'quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1:0e773bb207600fcb4d38202226eb20a33c7909b6-0' }" 'quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1:38aed4501da19db366dc7c8d52d31d94e760cfaf-0' }"
input: input:
tuple val(meta), path(reads) tuple val(meta), path(reads)
@ -25,7 +25,7 @@ process HISAT2_ALIGN {
script: script:
def args = task.ext.args ?: '' def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}" def prefix = task.ext.prefix ?: "${meta.id}"
def VERSION = '2.2.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. def VERSION = '2.2.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
def strandedness = '' def strandedness = ''
if (meta.strandedness == 'forward') { if (meta.strandedness == 'forward') {

View file

@ -44,7 +44,7 @@ process HISAT2_BUILD {
log.info "[HISAT2 index build] Less than ${hisat2_build_memory} GB available, so NOT using splice sites and exons to build HISAT2 index." log.info "[HISAT2 index build] Less than ${hisat2_build_memory} GB available, so NOT using splice sites and exons to build HISAT2 index."
log.info "[HISAT2 index build] Use --hisat2_build_memory [small number] to skip this check." log.info "[HISAT2 index build] Use --hisat2_build_memory [small number] to skip this check."
} }
def VERSION = '2.2.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. def VERSION = '2.2.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
""" """
mkdir hisat2 mkdir hisat2
$extract_exons $extract_exons

View file

@ -20,7 +20,7 @@ process HISAT2_EXTRACTSPLICESITES {
script: script:
def args = task.ext.args ?: '' def args = task.ext.args ?: ''
def VERSION = '2.2.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. def VERSION = '2.2.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
""" """
hisat2_extract_splice_sites.py $gtf > ${gtf.baseName}.splice_sites.txt hisat2_extract_splice_sites.py $gtf > ${gtf.baseName}.splice_sites.txt
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml

View file

@ -11,8 +11,8 @@ process TABIX_BGZIP {
tuple val(meta), path(input) tuple val(meta), path(input)
output: output:
tuple val(meta), path("${prefix}*"), emit: output tuple val(meta), path("${output}") , emit: output
tuple val(meta), path("*gzi") , emit: gzi, optional: true tuple val(meta), path("${output}.gzi"), emit: gzi, optional: true
path "versions.yml" , emit: versions path "versions.yml" , emit: versions
when: when:
@ -21,12 +21,13 @@ process TABIX_BGZIP {
script: script:
def args = task.ext.args ?: '' def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}" prefix = task.ext.prefix ?: "${meta.id}"
in_bgzip = input.toString().endsWith(".gz") in_bgzip = ["gz", "bgz", "bgzf"].contains(input.getExtension())
output = in_bgzip ? input.getBaseName() : "${prefix}.${input.getExtension()}.gz"
command1 = in_bgzip ? '-d' : '-c' command1 = in_bgzip ? '-d' : '-c'
command2 = in_bgzip ? '' : " > ${prefix}.${input.getExtension()}.gz" command2 = in_bgzip ? '' : " > ${output}"
// Name the index according to $prefix, unless a name has been requested // Name the index according to $prefix, unless a name has been requested
if ((args.matches("(^| )-i\\b") || args.matches("(^| )--index(\$| )")) && !args.matches("(^| )-I\\b") && !args.matches("(^| )--index-name\\b")) { if ((args.matches("(^| )-i\\b") || args.matches("(^| )--index(\$| )")) && !args.matches("(^| )-I\\b") && !args.matches("(^| )--index-name\\b")) {
args = args + " -I ${prefix}.${input.getExtension()}.gz.gzi" args = args + " -I ${output}.gzi"
} }
""" """
bgzip $command1 $args -@${task.cpus} $input $command2 bgzip $command1 $args -@${task.cpus} $input $command2

View file

@ -0,0 +1,49 @@
process VCFLIB_VCFBREAKMULTI {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::vcflib=1.0.3" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/vcflib:1.0.3--hecb563c_1':
'quay.io/biocontainers/vcflib:1.0.3--ha025227_0' }"
input:
tuple val(meta), path(vcf), path(tbi)
output:
tuple val(meta), path("*.vcf.gz"), emit: vcf
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def VERSION = '1.0.3' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
"""
vcfbreakmulti \\
$vcf \\
$args \\
| bgzip -c $args2 > ${prefix}.breakmulti.vcf.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
vcflib: $VERSION
END_VERSIONS
"""
stub:
def prefix = task.ext.prefix ?: "${meta.id}"
def VERSION = '1.0.3' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
"""
touch ${prefix}.breakmulti.vcf.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
vcflib: $VERSION
END_VERSIONS
"""
}

View file

@ -0,0 +1,45 @@
name: "vcflib_vcfbreakmulti"
description: If multiple alleles are specified in a single record, break the record into several lines preserving allele-specific INFO fields
keywords:
- vcflib
- vcflib/vcfbreakmulti
tools:
- "vcflib":
description: "Command-line tools for manipulating VCF files"
homepage: https://github.com/vcflib/vcflib
documentation: https://github.com/vcflib/vcflib#USAGE
doi: "https://doi.org/10.1101/2021.05.21.445151"
licence: "['MIT']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- vcf:
type: file
description: Compressed VCF file
pattern: "*.{.vcf.gz,vcf}"
- tbi:
type: file
description: Index of VCF file
pattern: "*.vcf.gz.tbi"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- vcf:
type: file
description: Compressed VCF file
pattern: "*.vcf.gz"
authors:
- "@lucpen"

View file

@ -499,6 +499,10 @@ checkm/lineagewf:
- modules/checkm/lineagewf/** - modules/checkm/lineagewf/**
- tests/modules/checkm/lineagewf/** - tests/modules/checkm/lineagewf/**
checkm/qa:
- modules/checkm/qa/**
- tests/modules/checkm/qa/**
chromap/chromap: chromap/chromap:
- modules/chromap/chromap/** - modules/chromap/chromap/**
- tests/modules/chromap/chromap/** - tests/modules/chromap/chromap/**
@ -803,6 +807,10 @@ fasttree:
- modules/fasttree/** - modules/fasttree/**
- tests/modules/fasttree/** - tests/modules/fasttree/**
fcs/fcsadaptor:
- modules/fcs/fcsadaptor/**
- tests/modules/fcs/fcsadaptor/**
ffq: ffq:
- modules/ffq/** - modules/ffq/**
- tests/modules/ffq/** - tests/modules/ffq/**
@ -2500,6 +2508,10 @@ vcfanno:
- modules/vcfanno/** - modules/vcfanno/**
- tests/modules/vcfanno/** - tests/modules/vcfanno/**
vcflib/vcfbreakmulti:
- modules/vcflib/vcfbreakmulti/**
- tests/modules/vcflib/vcfbreakmulti/**
vcflib/vcfuniq: vcflib/vcfuniq:
- modules/vcflib/vcfuniq/** - modules/vcflib/vcfuniq/**
- tests/modules/vcflib/vcfuniq/** - tests/modules/vcflib/vcfuniq/**

View file

@ -13,58 +13,11 @@ workflow test_busco_genome_single_fasta {
BUSCO ( BUSCO (
input, input,
['bacteria_odb10', 'bacteroidetes_odb10'], // Launch with 'auto' to use --auto-lineage, and specified lineages // 'auto' removed from test due to memory issues 'bacteria_odb10', // Launch with 'auto' to use --auto-lineage, and specified lineages // 'auto' removed from test due to memory issues
[], // Download busco lineage [], // Download busco lineage
[] // No config [] // No config
) )
/* Output tree:
/tmp/tmpyz_hi62i/busco/
├── short_summary.specific.bacteria_odb10.genome.fna.json -> /tmp/tmpza_0dth3/33/7d8c9b2c8931d9ad6a67aa843895e7/short_summary.specific.bacteria_odb10.genome.fna.json
├── short_summary.specific.bacteria_odb10.genome.fna.txt -> /tmp/tmpza_0dth3/33/7d8c9b2c8931d9ad6a67aa843895e7/short_summary.specific.bacteria_odb10.genome.fna.txt
├── short_summary.specific.bacteroidetes_odb10.genome.fna.json -> /tmp/tmpza_0dth3/6a/e95a0cd21785ce33d63b8f73a68a51/short_summary.specific.bacteroidetes_odb10.genome.fna.json
├── short_summary.specific.bacteroidetes_odb10.genome.fna.txt -> /tmp/tmpza_0dth3/6a/e95a0cd21785ce33d63b8f73a68a51/short_summary.specific.bacteroidetes_odb10.genome.fna.txt
├── test-bacteria_odb10-busco -> /tmp/tmpza_0dth3/33/7d8c9b2c8931d9ad6a67aa843895e7/test-bacteria_odb10-busco/
│ ├── genome.fna/
│ │ ├── logs/
│ │ │ ├── hmmsearch_err.log
│ │ │ ├── hmmsearch_out.log
│ │ │ ├── prodigal_err.log
│ │ │ └── prodigal_out.log
│ │ ├── prodigal_output/
│ │ │ └── predicted_genes/
│ │ └── run_bacteria_odb10/
│ │ ├── busco_sequences/
│ │ ├── full_table.tsv
│ │ ├── hmmer_output/
│ │ ├── missing_busco_list.tsv
│ │ ├── short_summary.json
│ │ └── short_summary.txt
│ └── logs/
│ └── busco.log
├── test-bacteria_odb10-busco.batch_summary.txt -> /tmp/tmpza_0dth3/33/7d8c9b2c8931d9ad6a67aa843895e7/test-bacteria_odb10-busco.batch_summary.txt
├── test-bacteroidetes_odb10-busco -> /tmp/tmpza_0dth3/6a/e95a0cd21785ce33d63b8f73a68a51/test-bacteroidetes_odb10-busco/
│ ├── genome.fna/
│ │ ├── logs/
│ │ │ ├── hmmsearch_err.log
│ │ │ ├── hmmsearch_out.log
│ │ │ ├── prodigal_err.log
│ │ │ └── prodigal_out.log
│ │ ├── prodigal_output/
│ │ │ └── predicted_genes/
│ │ └── run_bacteroidetes_odb10/
│ │ ├── busco_sequences/
│ │ ├── full_table.tsv
│ │ ├── hmmer_output/
│ │ ├── missing_busco_list.tsv
│ │ ├── short_summary.json
│ │ └── short_summary.txt
│ └── logs/
│ └── busco.log
├── test-bacteroidetes_odb10-busco.batch_summary.txt -> /tmp/tmpza_0dth3/6a/e95a0cd21785ce33d63b8f73a68a51/test-bacteroidetes_odb10-busco.batch_summary.txt
└── versions.yml -> /tmp/tmpza_0dth3/6a/e95a0cd21785ce33d63b8f73a68a51/versions.yml
*/
} }
workflow test_busco_genome_multi_fasta { workflow test_busco_genome_multi_fasta {
@ -84,49 +37,6 @@ workflow test_busco_genome_multi_fasta {
[] // No config [] // No config
) )
/* Output tree:
/tmp/tmpk19byek7/busco/
├── short_summary.specific.bacteria_odb10.genome.fasta.json -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/short_summary.specific.bacteria_odb10.genome.fasta.json
├── short_summary.specific.bacteria_odb10.genome.fasta.txt -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/short_summary.specific.bacteria_odb10.genome.fasta.txt
├── short_summary.specific.bacteria_odb10.genome.fna.json -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/short_summary.specific.bacteria_odb10.genome.fna.json
├── short_summary.specific.bacteria_odb10.genome.fna.txt -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/short_summary.specific.bacteria_odb10.genome.fna.txt
├── test-bacteria_odb10-busco -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/test-bacteria_odb10-busco/
│ ├── genome.fasta/
│ │ ├── logs/
│ │ │ ├── hmmsearch_err.log
│ │ │ ├── hmmsearch_out.log
│ │ │ ├── prodigal_err.log
│ │ │ └── prodigal_out.log
│ │ ├── prodigal_output/
│ │ │ └── predicted_genes/
│ │ └── run_bacteria_odb10/
│ │ ├── busco_sequences/
│ │ ├── full_table.tsv
│ │ ├── hmmer_output/
│ │ ├── missing_busco_list.tsv
│ │ ├── short_summary.json
│ │ └── short_summary.txt
│ ├── genome.fna/
│ │ ├── logs/
│ │ │ ├── hmmsearch_err.log
│ │ │ ├── hmmsearch_out.log
│ │ │ ├── prodigal_err.log
│ │ │ └── prodigal_out.log
│ │ ├── prodigal_output/
│ │ │ └── predicted_genes/
│ │ └── run_bacteria_odb10/
│ │ ├── busco_sequences/
│ │ ├── full_table.tsv
│ │ ├── hmmer_output/
│ │ ├── missing_busco_list.tsv
│ │ ├── short_summary.json
│ │ └── short_summary.txt
│ └── logs/
│ └── busco.log
├── test-bacteria_odb10-busco.batch_summary.txt -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/test-bacteria_odb10-busco.batch_summary.txt
└── versions.yml -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/versions.yml
*/
} }
workflow test_busco_eukaryote_metaeuk { workflow test_busco_eukaryote_metaeuk {
@ -143,31 +53,6 @@ workflow test_busco_eukaryote_metaeuk {
[] // No config [] // No config
) )
/* Output tree:
/tmp/tmpeq4dsir5/busco/
├── short_summary.specific.eukaryota_odb10.genome.fasta.json -> /tmp/tmp60hby2pk/6f/529873d91cda6bae3a4a6a21746aee/short_summary.specific.eukaryota_odb10.genome.fasta.json
├── short_summary.specific.eukaryota_odb10.genome.fasta.txt -> /tmp/tmp60hby2pk/6f/529873d91cda6bae3a4a6a21746aee/short_summary.specific.eukaryota_odb10.genome.fasta.txt
├── test-eukaryota_odb10-busco -> /tmp/tmp60hby2pk/6f/529873d91cda6bae3a4a6a21746aee/test-eukaryota_odb10-busco/
│ ├── genome.fasta/
│ │ ├── logs/
│ │ │ ├── hmmsearch_err.log
│ │ │ ├── hmmsearch_out.log
│ │ │ ├── metaeuk_err.log
│ │ │ └── metaeuk_out.log
│ │ └── run_eukaryota_odb10/
│ │ ├── busco_sequences/
│ │ ├── full_table.tsv
│ │ ├── hmmer_output/
│ │ ├── metaeuk_output/
│ │ ├── missing_busco_list.tsv
│ │ ├── short_summary.json
│ │ └── short_summary.txt
│ └── logs/
│ └── busco.log
├── test-eukaryota_odb10-busco.batch_summary.txt -> /tmp/tmp60hby2pk/6f/529873d91cda6bae3a4a6a21746aee/test-eukaryota_odb10-busco.batch_summary.txt
└── versions.yml -> /tmp/tmp60hby2pk/6f/529873d91cda6bae3a4a6a21746aee/versions.yml
*/
} }
workflow test_busco_eukaryote_augustus { workflow test_busco_eukaryote_augustus {
@ -184,34 +69,6 @@ workflow test_busco_eukaryote_augustus {
[] // No config [] // No config
) )
/* Output tree:
/tmp/tmp2xqaygjj/busco/
├── test-eukaryota_odb10-busco -> /tmp/tmpjqs61x9o/3f/67cc14e873c0ceb45e2a27594d624c/test-eukaryota_odb10-busco/
│ ├── genome.fasta/
│ │ ├── blast_db/
│ │ │ ├── genome.fasta.ndb
│ │ │ ├── genome.fasta.nhr
│ │ │ ├── genome.fasta.nin
│ │ │ ├── genome.fasta.not
│ │ │ ├── genome.fasta.nsq
│ │ │ ├── genome.fasta.ntf
│ │ │ └── genome.fasta.nto
│ │ ├── logs/
│ │ │ ├── makeblastdb_err.log
│ │ │ ├── makeblastdb_out.log
│ │ │ ├── tblastn_err.log
│ │ │ └── tblastn_out.log
│ │ └── run_eukaryota_odb10/
│ │ ├── augustus_output/
│ │ ├── blast_output/
│ │ ├── busco_sequences/
│ │ └── hmmer_output/
│ └── logs/
│ └── busco.log
├── test-eukaryota_odb10-busco.batch_summary.txt -> /tmp/tmpjqs61x9o/3f/67cc14e873c0ceb45e2a27594d624c/test-eukaryota_odb10-busco.batch_summary.txt
└── versions.yml -> /tmp/tmpjqs61x9o/3f/67cc14e873c0ceb45e2a27594d624c/versions.yml
*/
} }
workflow test_busco_protein { workflow test_busco_protein {
@ -228,28 +85,8 @@ workflow test_busco_protein {
[] // No config [] // No config
) )
/* Output tree:
/tmp/tmpzwd5dn56/busco/
├── short_summary.specific.bacteria_odb10.proteome.fasta.json -> /tmp/tmpk1nlgbf_/ae/0db07b5cd08fb23d0aba5f134ebbe2/short_summary.specific.bacteria_odb10.proteome.fasta.json
├── short_summary.specific.bacteria_odb10.proteome.fasta.txt -> /tmp/tmpk1nlgbf_/ae/0db07b5cd08fb23d0aba5f134ebbe2/short_summary.specific.bacteria_odb10.proteome.fasta.txt
├── test-bacteria_odb10-busco -> /tmp/tmpk1nlgbf_/ae/0db07b5cd08fb23d0aba5f134ebbe2/test-bacteria_odb10-busco/
│ ├── logs/
│ │ └── busco.log
│ └── proteome.fasta/
│ ├── logs/
│ │ ├── hmmsearch_err.log
│ │ └── hmmsearch_out.log
│ └── run_bacteria_odb10/
│ ├── busco_sequences/
│ ├── full_table.tsv
│ ├── hmmer_output/
│ ├── missing_busco_list.tsv
│ ├── short_summary.json
│ └── short_summary.txt
├── test-bacteria_odb10-busco.batch_summary.txt -> /tmp/tmpk1nlgbf_/ae/0db07b5cd08fb23d0aba5f134ebbe2/test-bacteria_odb10-busco.batch_summary.txt
└── versions.yml -> /tmp/tmpk1nlgbf_/ae/0db07b5cd08fb23d0aba5f134ebbe2/versions.yml
*/
} }
workflow test_busco_transcriptome { workflow test_busco_transcriptome {
input = [ input = [
@ -264,63 +101,4 @@ workflow test_busco_transcriptome {
[] // No config [] // No config
) )
/* Output tree:
/tmp/tmpitjyvo9g/busco/
├── short_summary.specific.bacteria_odb10.test1.contigs.fa.json -> /tmp/tmp6wqi0eyx/4f/ed0b23f0fc807bb68091298845c135/short_summary.specific.bacteria_odb10.test1.contigs.fa.json
├── short_summary.specific.bacteria_odb10.test1.contigs.fa.txt -> /tmp/tmp6wqi0eyx/4f/ed0b23f0fc807bb68091298845c135/short_summary.specific.bacteria_odb10.test1.contigs.fa.txt
├── test-bacteria_odb10-busco -> /tmp/tmp6wqi0eyx/4f/ed0b23f0fc807bb68091298845c135/test-bacteria_odb10-busco/
│ ├── logs/
│ │ └── busco.log
│ └── test1.contigs.fa/
│ ├── blast_db/
│ │ ├── test1.contigs.fa.ndb
│ │ ├── test1.contigs.fa.nhr
│ │ ├── test1.contigs.fa.nin
│ │ ├── test1.contigs.fa.not
│ │ ├── test1.contigs.fa.nsq
│ │ ├── test1.contigs.fa.ntf
│ │ └── test1.contigs.fa.nto
│ ├── logs/
│ │ ├── hmmsearch_err.log
│ │ ├── hmmsearch_out.log
│ │ ├── makeblastdb_err.log
│ │ ├── makeblastdb_out.log
│ │ ├── tblastn_err.log
│ │ └── tblastn_out.log
│ ├── run_bacteria_odb10/
│ │ ├── blast_output/
│ │ ├── busco_sequences/
│ │ ├── full_table.tsv
│ │ ├── hmmer_output/
│ │ ├── missing_busco_list.tsv
│ │ ├── short_summary.json
│ │ ├── short_summary.txt
│ │ └── single_copy_proteins.faa
│ └── translated_proteins/
│ ├── 1024388at2.faa
│ ├── 1054741at2.faa
│ ├── 1093223at2.faa
│ ├── 1151822at2.faa
│ ├── 143460at2.faa
│ ├── 1491686at2.faa
│ ├── 1504821at2.faa
│ ├── 1574817at2.faa
│ ├── 1592033at2.faa
│ ├── 1623045at2.faa
│ ├── 1661836at2.faa
│ ├── 1674344at2.faa
│ ├── 1698718at2.faa
│ ├── 1990650at2.faa
│ ├── 223233at2.faa
│ ├── 402899at2.faa
│ ├── 505485at2.faa
│ ├── 665824at2.faa
│ ├── 776861at2.faa
│ ├── 874197at2.faa
│ ├── 932854at2.faa
│ └── 95696at2.faa
├── test-bacteria_odb10-busco.batch_summary.txt -> /tmp/tmp6wqi0eyx/4f/ed0b23f0fc807bb68091298845c135/test-bacteria_odb10-busco.batch_summary.txt
└── versions.yml -> /tmp/tmp6wqi0eyx/4f/ed0b23f0fc807bb68091298845c135/versions.yml
*/
} }

View file

@ -16,19 +16,6 @@
- "Complete BUSCOs" - "Complete BUSCOs"
- "Missing BUSCOs" - "Missing BUSCOs"
- "Dependencies and versions" - "Dependencies and versions"
- path: output/busco/short_summary.specific.bacteroidetes_odb10.genome.fna.json
contains:
- "one_line_summary"
- "mode"
- "dataset"
- path: output/busco/short_summary.specific.bacteroidetes_odb10.genome.fna.txt
contains:
- "BUSCO version"
- "The lineage dataset is"
- "BUSCO was run in mode"
- "Complete BUSCOs"
- "Missing BUSCOs"
- "Dependencies and versions"
- path: output/busco/test-bacteria_odb10-busco.batch_summary.txt - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt
md5sum: bc2440f8a68d7fbf931ff911c1c3fdfa md5sum: bc2440f8a68d7fbf931ff911c1c3fdfa
- path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_err.log - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_err.log
@ -93,70 +80,6 @@
- "DEBUG:busco.run_BUSCO" - "DEBUG:busco.run_BUSCO"
- "Results from dataset" - "Results from dataset"
- "how to cite BUSCO" - "how to cite BUSCO"
- path: output/busco/test-bacteroidetes_odb10-busco.batch_summary.txt
md5sum: 5ad8ecfbf4058b059e5edeb9170fc683
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/logs/bbtools_err.log
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/logs/bbtools_out.log
contains:
- "IUPAC"
- "genome scaffold"
- "Coverage"
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/logs/hmmsearch_err.log
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/logs/hmmsearch_out.log
contains:
- "# hmmsearch :: search profile(s) against a sequence database"
- "# target sequence database:"
- "Internal pipeline statistics summary:"
- "[ok]"
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/logs/prodigal_err.log
md5sum: 538510cfc7483498210f01e53fe035ad
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/logs/prodigal_out.log
md5sum: 61050b0706addc9498b2088a2d6efa9a
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/prodigal_output/.checkpoint
contains:
- "Tool: prodigal"
- "Completed"
- "jobs"
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.faa
md5sum: 836e9a80d33d8b89168f07ddc13ee991
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.fna
md5sum: 20eeb75f86842e6e136f02bca8b73a9f
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa
md5sum: 836e9a80d33d8b89168f07ddc13ee991
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.fna
md5sum: 20eeb75f86842e6e136f02bca8b73a9f
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_err.log
md5sum: 538510cfc7483498210f01e53fe035ad
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_out.log
md5sum: 61050b0706addc9498b2088a2d6efa9a
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/run_bacteroidetes_odb10/.bbtools_output/.checkpoint
contains:
- "Tool: bbtools"
- "Completed"
- "jobs"
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/run_bacteroidetes_odb10/busco_sequences/fragmented_busco_sequences.tar.gz
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/run_bacteroidetes_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/run_bacteroidetes_odb10/busco_sequences/single_copy_busco_sequences.tar.gz
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/run_bacteroidetes_odb10/full_table.tsv
md5sum: 2360bb06f7171dafaa7ebf879789689b
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/run_bacteroidetes_odb10/hmmer_output.tar.gz
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/run_bacteroidetes_odb10/missing_busco_list.tsv
md5sum: e1fb12cf416448d35fbc5c508065963a
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/run_bacteroidetes_odb10/short_summary.json
contains:
- "one_line_summary"
- "mode"
- "lineage_dataset"
- path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/run_bacteroidetes_odb10/short_summary.txt
contains:
- "# BUSCO version is:"
- "Results:"
- "busco:"
- path: output/busco/test-bacteroidetes_odb10-busco/logs/busco.log
contains:
- "DEBUG:busco.run_BUSCO"
- "Results from dataset"
- "how to cite BUSCO"
- name: busco test_busco_genome_multi_fasta - name: busco test_busco_genome_multi_fasta
command: nextflow run ./tests/modules/busco -entry test_busco_genome_multi_fasta -c ./tests/config/nextflow.config -c ./tests/modules/busco/nextflow.config command: nextflow run ./tests/modules/busco -entry test_busco_genome_multi_fasta -c ./tests/config/nextflow.config -c ./tests/modules/busco/nextflow.config

View file

@ -0,0 +1,44 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { CHECKM_LINEAGEWF } from '../../../../modules/checkm/lineagewf/main.nf'
include { CHECKM_LINEAGEWF as CHECKM_LINEAGEWF_FASTA } from '../../../../modules/checkm/lineagewf/main.nf'
include { CHECKM_QA } from '../../../../modules/checkm/qa/main.nf'
include { CHECKM_QA as CHECKM_QA_FASTA } from '../../../../modules/checkm/qa/main.nf'
workflow test_checkm_qa {
input = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) ]
fasta_ext = 'fasta'
CHECKM_LINEAGEWF ( input, fasta_ext, [] )
ch_checkmqa_input = CHECKM_LINEAGEWF.out.checkm_output
.join(CHECKM_LINEAGEWF.out.marker_file)
.map{
meta, dir, marker ->
[ meta, dir, marker, []]
}
CHECKM_QA ( ch_checkmqa_input, [] )
}
workflow test_checkm_qa_fasta {
input = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) ]
fasta_ext = 'fasta'
CHECKM_LINEAGEWF_FASTA ( input, fasta_ext, [] )
ch_checkmqa_input = CHECKM_LINEAGEWF_FASTA.out.checkm_output
.join(CHECKM_LINEAGEWF_FASTA.out.marker_file)
.map{
meta, dir, marker ->
[ meta, dir, marker, []]
}
CHECKM_QA_FASTA ( ch_checkmqa_input, [] )
}

View file

@ -0,0 +1,16 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: CHECKM_QA {
ext.prefix = { "${meta.id}.qa" }
}
withName: CHECKM_QA_FASTA {
ext.args = "-o 9"
ext.prefix = { "${meta.id}.qa" }
}
}

View file

@ -0,0 +1,16 @@
- name: checkm qa test_checkm_qa
command: nextflow run ./tests/modules/checkm/qa -entry test_checkm_qa -c ./tests/config/nextflow.config -c ./tests/modules/checkm/qa/nextflow.config
tags:
- checkm/qa
- checkm
files:
- path: output/checkm/test.qa.txt
md5sum: 645f4282569afb4b171396732b2d2582
- name: checkm qa test_checkm_qa_fasta
command: nextflow run ./tests/modules/checkm/qa -entry test_checkm_qa_fasta -c ./tests/config/nextflow.config -c ./tests/modules/checkm/qa/nextflow.config
tags:
- checkm/qa
- checkm
files:
- path: output/checkm/test.qa.fasta

View file

@ -0,0 +1,15 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { FCS_FCSADAPTOR } from '../../../../modules/fcs/fcsadaptor/main.nf'
workflow test_fcs_fcsadaptor {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true)
]
FCS_FCSADAPTOR ( input )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,15 @@
- name: fcs fcsadaptor test_fcs_fcsadaptor
command: nextflow run ./tests/modules/fcs/fcsadaptor -entry test_fcs_fcsadaptor -c ./tests/config/nextflow.config -c ./tests/modules/fcs/fcsadaptor/nextflow.config
tags:
- fcs/fcsadaptor
- fcs
files:
- path: output/fcs/test.cleaned_sequences.fa.gz
md5sum: 2f331e59b352418420a35236091bf93c
- path: output/fcs/test.fcs_adaptor.log
contains: ["Original command:", "Executing:", "INFO:cwltool:[workflow ] completed success"]
- path: output/fcs/test.fcs_adaptor_report.txt
md5sum: 27375be4671e01d2d2674ddd1d44414a
- path: output/fcs/test.pipeline_args.yaml
md5sum: 80effed2a75550a6a861c47f4c30449d
- path: output/fcs/test.skipped_trims.jsonl

View file

@ -7,7 +7,6 @@
- path: output/hisat2/test.hisat2.summary.log - path: output/hisat2/test.hisat2.summary.log
md5sum: 7b8a9e61b7646da1089b041333c41a87 md5sum: 7b8a9e61b7646da1089b041333c41a87
- path: output/hisat2/genome.splice_sites.txt - path: output/hisat2/genome.splice_sites.txt
md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/hisat2/test.bam - path: output/hisat2/test.bam
- path: output/hisat2/hisat2/genome.5.ht2 - path: output/hisat2/hisat2/genome.5.ht2
md5sum: 91198831aaba993acac1734138c5f173 md5sum: 91198831aaba993acac1734138c5f173
@ -35,7 +34,6 @@
- path: output/hisat2/test.hisat2.summary.log - path: output/hisat2/test.hisat2.summary.log
md5sum: 9839b31db795958cc4b70711a3414e9c md5sum: 9839b31db795958cc4b70711a3414e9c
- path: output/hisat2/genome.splice_sites.txt - path: output/hisat2/genome.splice_sites.txt
md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/hisat2/test.bam - path: output/hisat2/test.bam
- path: output/hisat2/hisat2/genome.5.ht2 - path: output/hisat2/hisat2/genome.5.ht2
md5sum: 91198831aaba993acac1734138c5f173 md5sum: 91198831aaba993acac1734138c5f173

View file

@ -5,7 +5,6 @@
- hisat2/build - hisat2/build
files: files:
- path: output/hisat2/genome.splice_sites.txt - path: output/hisat2/genome.splice_sites.txt
md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/hisat2/hisat2/genome.5.ht2 - path: output/hisat2/hisat2/genome.5.ht2
md5sum: 91198831aaba993acac1734138c5f173 md5sum: 91198831aaba993acac1734138c5f173
- path: output/hisat2/hisat2/genome.7.ht2 - path: output/hisat2/hisat2/genome.7.ht2

View file

@ -5,4 +5,3 @@
- hisat2/extractsplicesites - hisat2/extractsplicesites
files: files:
- path: output/hisat2/genome.splice_sites.txt - path: output/hisat2/genome.splice_sites.txt
md5sum: d41d8cd98f00b204e9800998ecf8427e

View file

@ -0,0 +1,16 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { VCFLIB_VCFBREAKMULTI } from '../../../../modules/vcflib/vcfbreakmulti/main.nf'
workflow test_vcflib_vcfbreakmulti {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)
]
VCFLIB_VCFBREAKMULTI ( input )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,16 @@
- name: vcflib vcfbreakmulti
command: nextflow run ./tests/modules/vcflib/vcfbreakmulti -entry test_vcflib_vcfbreakmulti -c tests/config/nextflow.config
tags:
- vcflib/vcfbreakmulti
- vcflib
files:
- path: output/vcflib/test.breakmulti.vcf.gz
md5sum: 666d2d970399cf219dc1dc1eadff9170
- name: vcflib vcfbreakmulti stub
command: nextflow run ./tests/modules/vcflib/vcfbreakmulti -entry test_vcflib_vcfbreakmulti -c tests/config/nextflow.config -stub
tags:
- vcflib/vcfbreakmulti
- vcflib
files:
- path: output/vcflib/test.breakmulti.vcf.gz