From ebc466caacf58f81427c635bd25ed683ceaea01d Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Thu, 29 Sep 2022 22:35:51 +0200 Subject: [PATCH 1/6] Update busco input type (#2135) * Update busco input type * Fix singularity test --- modules/busco/main.nf | 2 +- tests/modules/busco/main.nf | 226 +---------------------------------- tests/modules/busco/test.yml | 77 ------------ 3 files changed, 3 insertions(+), 302 deletions(-) diff --git a/modules/busco/main.nf b/modules/busco/main.nf index 40354168..483768f1 100644 --- a/modules/busco/main.nf +++ b/modules/busco/main.nf @@ -9,7 +9,7 @@ process BUSCO { input: tuple val(meta), path('tmp_input/*') - each lineage // Required: lineage to check against, "auto" enables --auto-lineage instead + val lineage // Required: lineage to check against, "auto" enables --auto-lineage instead path busco_lineages_path // Recommended: path to busco lineages - downloads if not set path config_file // Optional: busco configuration file diff --git a/tests/modules/busco/main.nf b/tests/modules/busco/main.nf index 2fd4320c..2313bd06 100644 --- a/tests/modules/busco/main.nf +++ b/tests/modules/busco/main.nf @@ -13,58 +13,11 @@ workflow test_busco_genome_single_fasta { BUSCO ( input, - ['bacteria_odb10', 'bacteroidetes_odb10'], // Launch with 'auto' to use --auto-lineage, and specified lineages // 'auto' removed from test due to memory issues + 'bacteria_odb10', // Launch with 'auto' to use --auto-lineage, and specified lineages // 'auto' removed from test due to memory issues [], // Download busco lineage [] // No config ) - /* Output tree: - /tmp/tmpyz_hi62i/busco/ - ├── short_summary.specific.bacteria_odb10.genome.fna.json -> /tmp/tmpza_0dth3/33/7d8c9b2c8931d9ad6a67aa843895e7/short_summary.specific.bacteria_odb10.genome.fna.json - ├── short_summary.specific.bacteria_odb10.genome.fna.txt -> /tmp/tmpza_0dth3/33/7d8c9b2c8931d9ad6a67aa843895e7/short_summary.specific.bacteria_odb10.genome.fna.txt - ├── short_summary.specific.bacteroidetes_odb10.genome.fna.json -> /tmp/tmpza_0dth3/6a/e95a0cd21785ce33d63b8f73a68a51/short_summary.specific.bacteroidetes_odb10.genome.fna.json - ├── short_summary.specific.bacteroidetes_odb10.genome.fna.txt -> /tmp/tmpza_0dth3/6a/e95a0cd21785ce33d63b8f73a68a51/short_summary.specific.bacteroidetes_odb10.genome.fna.txt - ├── test-bacteria_odb10-busco -> /tmp/tmpza_0dth3/33/7d8c9b2c8931d9ad6a67aa843895e7/test-bacteria_odb10-busco/ - │ ├── genome.fna/ - │ │ ├── logs/ - │ │ │ ├── hmmsearch_err.log - │ │ │ ├── hmmsearch_out.log - │ │ │ ├── prodigal_err.log - │ │ │ └── prodigal_out.log - │ │ ├── prodigal_output/ - │ │ │ └── predicted_genes/ - │ │ └── run_bacteria_odb10/ - │ │ ├── busco_sequences/ - │ │ ├── full_table.tsv - │ │ ├── hmmer_output/ - │ │ ├── missing_busco_list.tsv - │ │ ├── short_summary.json - │ │ └── short_summary.txt - │ └── logs/ - │ └── busco.log - ├── test-bacteria_odb10-busco.batch_summary.txt -> /tmp/tmpza_0dth3/33/7d8c9b2c8931d9ad6a67aa843895e7/test-bacteria_odb10-busco.batch_summary.txt - ├── test-bacteroidetes_odb10-busco -> /tmp/tmpza_0dth3/6a/e95a0cd21785ce33d63b8f73a68a51/test-bacteroidetes_odb10-busco/ - │ ├── genome.fna/ - │ │ ├── logs/ - │ │ │ ├── hmmsearch_err.log - │ │ │ ├── hmmsearch_out.log - │ │ │ ├── prodigal_err.log - │ │ │ └── prodigal_out.log - │ │ ├── prodigal_output/ - │ │ │ └── predicted_genes/ - │ │ └── run_bacteroidetes_odb10/ - │ │ ├── busco_sequences/ - │ │ ├── full_table.tsv - │ │ ├── hmmer_output/ - │ │ ├── missing_busco_list.tsv - │ │ ├── short_summary.json - │ │ └── short_summary.txt - │ └── logs/ - │ └── busco.log - ├── test-bacteroidetes_odb10-busco.batch_summary.txt -> /tmp/tmpza_0dth3/6a/e95a0cd21785ce33d63b8f73a68a51/test-bacteroidetes_odb10-busco.batch_summary.txt - └── versions.yml -> /tmp/tmpza_0dth3/6a/e95a0cd21785ce33d63b8f73a68a51/versions.yml - */ - } workflow test_busco_genome_multi_fasta { @@ -84,49 +37,6 @@ workflow test_busco_genome_multi_fasta { [] // No config ) - /* Output tree: - /tmp/tmpk19byek7/busco/ - ├── short_summary.specific.bacteria_odb10.genome.fasta.json -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/short_summary.specific.bacteria_odb10.genome.fasta.json - ├── short_summary.specific.bacteria_odb10.genome.fasta.txt -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/short_summary.specific.bacteria_odb10.genome.fasta.txt - ├── short_summary.specific.bacteria_odb10.genome.fna.json -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/short_summary.specific.bacteria_odb10.genome.fna.json - ├── short_summary.specific.bacteria_odb10.genome.fna.txt -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/short_summary.specific.bacteria_odb10.genome.fna.txt - ├── test-bacteria_odb10-busco -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/test-bacteria_odb10-busco/ - │ ├── genome.fasta/ - │ │ ├── logs/ - │ │ │ ├── hmmsearch_err.log - │ │ │ ├── hmmsearch_out.log - │ │ │ ├── prodigal_err.log - │ │ │ └── prodigal_out.log - │ │ ├── prodigal_output/ - │ │ │ └── predicted_genes/ - │ │ └── run_bacteria_odb10/ - │ │ ├── busco_sequences/ - │ │ ├── full_table.tsv - │ │ ├── hmmer_output/ - │ │ ├── missing_busco_list.tsv - │ │ ├── short_summary.json - │ │ └── short_summary.txt - │ ├── genome.fna/ - │ │ ├── logs/ - │ │ │ ├── hmmsearch_err.log - │ │ │ ├── hmmsearch_out.log - │ │ │ ├── prodigal_err.log - │ │ │ └── prodigal_out.log - │ │ ├── prodigal_output/ - │ │ │ └── predicted_genes/ - │ │ └── run_bacteria_odb10/ - │ │ ├── busco_sequences/ - │ │ ├── full_table.tsv - │ │ ├── hmmer_output/ - │ │ ├── missing_busco_list.tsv - │ │ ├── short_summary.json - │ │ └── short_summary.txt - │ └── logs/ - │ └── busco.log - ├── test-bacteria_odb10-busco.batch_summary.txt -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/test-bacteria_odb10-busco.batch_summary.txt - └── versions.yml -> /tmp/tmplt9fv3tl/15/ff310a16d9ce7ad24e207a05ce718e/versions.yml - */ - } workflow test_busco_eukaryote_metaeuk { @@ -143,31 +53,6 @@ workflow test_busco_eukaryote_metaeuk { [] // No config ) - /* Output tree: - /tmp/tmpeq4dsir5/busco/ - ├── short_summary.specific.eukaryota_odb10.genome.fasta.json -> /tmp/tmp60hby2pk/6f/529873d91cda6bae3a4a6a21746aee/short_summary.specific.eukaryota_odb10.genome.fasta.json - ├── short_summary.specific.eukaryota_odb10.genome.fasta.txt -> /tmp/tmp60hby2pk/6f/529873d91cda6bae3a4a6a21746aee/short_summary.specific.eukaryota_odb10.genome.fasta.txt - ├── test-eukaryota_odb10-busco -> /tmp/tmp60hby2pk/6f/529873d91cda6bae3a4a6a21746aee/test-eukaryota_odb10-busco/ - │ ├── genome.fasta/ - │ │ ├── logs/ - │ │ │ ├── hmmsearch_err.log - │ │ │ ├── hmmsearch_out.log - │ │ │ ├── metaeuk_err.log - │ │ │ └── metaeuk_out.log - │ │ └── run_eukaryota_odb10/ - │ │ ├── busco_sequences/ - │ │ ├── full_table.tsv - │ │ ├── hmmer_output/ - │ │ ├── metaeuk_output/ - │ │ ├── missing_busco_list.tsv - │ │ ├── short_summary.json - │ │ └── short_summary.txt - │ └── logs/ - │ └── busco.log - ├── test-eukaryota_odb10-busco.batch_summary.txt -> /tmp/tmp60hby2pk/6f/529873d91cda6bae3a4a6a21746aee/test-eukaryota_odb10-busco.batch_summary.txt - └── versions.yml -> /tmp/tmp60hby2pk/6f/529873d91cda6bae3a4a6a21746aee/versions.yml - */ - } workflow test_busco_eukaryote_augustus { @@ -184,34 +69,6 @@ workflow test_busco_eukaryote_augustus { [] // No config ) - /* Output tree: - /tmp/tmp2xqaygjj/busco/ - ├── test-eukaryota_odb10-busco -> /tmp/tmpjqs61x9o/3f/67cc14e873c0ceb45e2a27594d624c/test-eukaryota_odb10-busco/ - │ ├── genome.fasta/ - │ │ ├── blast_db/ - │ │ │ ├── genome.fasta.ndb - │ │ │ ├── genome.fasta.nhr - │ │ │ ├── genome.fasta.nin - │ │ │ ├── genome.fasta.not - │ │ │ ├── genome.fasta.nsq - │ │ │ ├── genome.fasta.ntf - │ │ │ └── genome.fasta.nto - │ │ ├── logs/ - │ │ │ ├── makeblastdb_err.log - │ │ │ ├── makeblastdb_out.log - │ │ │ ├── tblastn_err.log - │ │ │ └── tblastn_out.log - │ │ └── run_eukaryota_odb10/ - │ │ ├── augustus_output/ - │ │ ├── blast_output/ - │ │ ├── busco_sequences/ - │ │ └── hmmer_output/ - │ └── logs/ - │ └── busco.log - ├── test-eukaryota_odb10-busco.batch_summary.txt -> /tmp/tmpjqs61x9o/3f/67cc14e873c0ceb45e2a27594d624c/test-eukaryota_odb10-busco.batch_summary.txt - └── versions.yml -> /tmp/tmpjqs61x9o/3f/67cc14e873c0ceb45e2a27594d624c/versions.yml - */ - } workflow test_busco_protein { @@ -228,28 +85,8 @@ workflow test_busco_protein { [] // No config ) - /* Output tree: - /tmp/tmpzwd5dn56/busco/ - ├── short_summary.specific.bacteria_odb10.proteome.fasta.json -> /tmp/tmpk1nlgbf_/ae/0db07b5cd08fb23d0aba5f134ebbe2/short_summary.specific.bacteria_odb10.proteome.fasta.json - ├── short_summary.specific.bacteria_odb10.proteome.fasta.txt -> /tmp/tmpk1nlgbf_/ae/0db07b5cd08fb23d0aba5f134ebbe2/short_summary.specific.bacteria_odb10.proteome.fasta.txt - ├── test-bacteria_odb10-busco -> /tmp/tmpk1nlgbf_/ae/0db07b5cd08fb23d0aba5f134ebbe2/test-bacteria_odb10-busco/ - │ ├── logs/ - │ │ └── busco.log - │ └── proteome.fasta/ - │ ├── logs/ - │ │ ├── hmmsearch_err.log - │ │ └── hmmsearch_out.log - │ └── run_bacteria_odb10/ - │ ├── busco_sequences/ - │ ├── full_table.tsv - │ ├── hmmer_output/ - │ ├── missing_busco_list.tsv - │ ├── short_summary.json - │ └── short_summary.txt - ├── test-bacteria_odb10-busco.batch_summary.txt -> /tmp/tmpk1nlgbf_/ae/0db07b5cd08fb23d0aba5f134ebbe2/test-bacteria_odb10-busco.batch_summary.txt - └── versions.yml -> /tmp/tmpk1nlgbf_/ae/0db07b5cd08fb23d0aba5f134ebbe2/versions.yml - */ } + workflow test_busco_transcriptome { input = [ @@ -264,63 +101,4 @@ workflow test_busco_transcriptome { [] // No config ) - /* Output tree: - /tmp/tmpitjyvo9g/busco/ - ├── short_summary.specific.bacteria_odb10.test1.contigs.fa.json -> /tmp/tmp6wqi0eyx/4f/ed0b23f0fc807bb68091298845c135/short_summary.specific.bacteria_odb10.test1.contigs.fa.json - ├── short_summary.specific.bacteria_odb10.test1.contigs.fa.txt -> /tmp/tmp6wqi0eyx/4f/ed0b23f0fc807bb68091298845c135/short_summary.specific.bacteria_odb10.test1.contigs.fa.txt - ├── test-bacteria_odb10-busco -> /tmp/tmp6wqi0eyx/4f/ed0b23f0fc807bb68091298845c135/test-bacteria_odb10-busco/ - │ ├── logs/ - │ │ └── busco.log - │ └── test1.contigs.fa/ - │ ├── blast_db/ - │ │ ├── test1.contigs.fa.ndb - │ │ ├── test1.contigs.fa.nhr - │ │ ├── test1.contigs.fa.nin - │ │ ├── test1.contigs.fa.not - │ │ ├── test1.contigs.fa.nsq - │ │ ├── test1.contigs.fa.ntf - │ │ └── test1.contigs.fa.nto - │ ├── logs/ - │ │ ├── hmmsearch_err.log - │ │ ├── hmmsearch_out.log - │ │ ├── makeblastdb_err.log - │ │ ├── makeblastdb_out.log - │ │ ├── tblastn_err.log - │ │ └── tblastn_out.log - │ ├── run_bacteria_odb10/ - │ │ ├── blast_output/ - │ │ ├── busco_sequences/ - │ │ ├── full_table.tsv - │ │ ├── hmmer_output/ - │ │ ├── missing_busco_list.tsv - │ │ ├── short_summary.json - │ │ ├── short_summary.txt - │ │ └── single_copy_proteins.faa - │ └── translated_proteins/ - │ ├── 1024388at2.faa - │ ├── 1054741at2.faa - │ ├── 1093223at2.faa - │ ├── 1151822at2.faa - │ ├── 143460at2.faa - │ ├── 1491686at2.faa - │ ├── 1504821at2.faa - │ ├── 1574817at2.faa - │ ├── 1592033at2.faa - │ ├── 1623045at2.faa - │ ├── 1661836at2.faa - │ ├── 1674344at2.faa - │ ├── 1698718at2.faa - │ ├── 1990650at2.faa - │ ├── 223233at2.faa - │ ├── 402899at2.faa - │ ├── 505485at2.faa - │ ├── 665824at2.faa - │ ├── 776861at2.faa - │ ├── 874197at2.faa - │ ├── 932854at2.faa - │ └── 95696at2.faa - ├── test-bacteria_odb10-busco.batch_summary.txt -> /tmp/tmp6wqi0eyx/4f/ed0b23f0fc807bb68091298845c135/test-bacteria_odb10-busco.batch_summary.txt - └── versions.yml -> /tmp/tmp6wqi0eyx/4f/ed0b23f0fc807bb68091298845c135/versions.yml - */ - } diff --git a/tests/modules/busco/test.yml b/tests/modules/busco/test.yml index 3c41284d..562f427f 100644 --- a/tests/modules/busco/test.yml +++ b/tests/modules/busco/test.yml @@ -16,19 +16,6 @@ - "Complete BUSCOs" - "Missing BUSCOs" - "Dependencies and versions" - - path: output/busco/short_summary.specific.bacteroidetes_odb10.genome.fna.json - contains: - - "one_line_summary" - - "mode" - - "dataset" - - path: output/busco/short_summary.specific.bacteroidetes_odb10.genome.fna.txt - contains: - - "BUSCO version" - - "The lineage dataset is" - - "BUSCO was run in mode" - - "Complete BUSCOs" - - "Missing BUSCOs" - - "Dependencies and versions" - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt md5sum: bc2440f8a68d7fbf931ff911c1c3fdfa - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_err.log @@ -93,70 +80,6 @@ - "DEBUG:busco.run_BUSCO" - "Results from dataset" - "how to cite BUSCO" - - path: output/busco/test-bacteroidetes_odb10-busco.batch_summary.txt - md5sum: 5ad8ecfbf4058b059e5edeb9170fc683 - - path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/logs/bbtools_err.log - - path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/logs/bbtools_out.log - contains: - - "IUPAC" - - "genome scaffold" - - "Coverage" - - path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/logs/hmmsearch_err.log - - path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/logs/hmmsearch_out.log - contains: - - "# hmmsearch :: search profile(s) against a sequence database" - - "# target sequence database:" - - "Internal pipeline statistics summary:" - - "[ok]" - - path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/logs/prodigal_err.log - md5sum: 538510cfc7483498210f01e53fe035ad - - path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/logs/prodigal_out.log - md5sum: 61050b0706addc9498b2088a2d6efa9a - - path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/prodigal_output/.checkpoint - contains: - - "Tool: prodigal" - - "Completed" - - "jobs" - - path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.faa - md5sum: 836e9a80d33d8b89168f07ddc13ee991 - - path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.fna - md5sum: 20eeb75f86842e6e136f02bca8b73a9f - - path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa - md5sum: 836e9a80d33d8b89168f07ddc13ee991 - - path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.fna - md5sum: 20eeb75f86842e6e136f02bca8b73a9f - - path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_err.log - md5sum: 538510cfc7483498210f01e53fe035ad - - path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_out.log - md5sum: 61050b0706addc9498b2088a2d6efa9a - - path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/run_bacteroidetes_odb10/.bbtools_output/.checkpoint - contains: - - "Tool: bbtools" - - "Completed" - - "jobs" - - path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/run_bacteroidetes_odb10/busco_sequences/fragmented_busco_sequences.tar.gz - - path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/run_bacteroidetes_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz - - path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/run_bacteroidetes_odb10/busco_sequences/single_copy_busco_sequences.tar.gz - - path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/run_bacteroidetes_odb10/full_table.tsv - md5sum: 2360bb06f7171dafaa7ebf879789689b - - path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/run_bacteroidetes_odb10/hmmer_output.tar.gz - - path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/run_bacteroidetes_odb10/missing_busco_list.tsv - md5sum: e1fb12cf416448d35fbc5c508065963a - - path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/run_bacteroidetes_odb10/short_summary.json - contains: - - "one_line_summary" - - "mode" - - "lineage_dataset" - - path: output/busco/test-bacteroidetes_odb10-busco/genome.fna/run_bacteroidetes_odb10/short_summary.txt - contains: - - "# BUSCO version is:" - - "Results:" - - "busco:" - - path: output/busco/test-bacteroidetes_odb10-busco/logs/busco.log - contains: - - "DEBUG:busco.run_BUSCO" - - "Results from dataset" - - "how to cite BUSCO" - name: busco test_busco_genome_multi_fasta command: nextflow run ./tests/modules/busco -entry test_busco_genome_multi_fasta -c ./tests/config/nextflow.config -c ./tests/modules/busco/nextflow.config From 31c0b49f6527ef196e89eca49a36af2de71711f8 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 30 Sep 2022 13:31:48 +0100 Subject: [PATCH 2/6] Explicitly name the output files to avoid the gzi index being flown through the `output` channel (#2136) --- modules/tabix/bgzip/main.nf | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/modules/tabix/bgzip/main.nf b/modules/tabix/bgzip/main.nf index 0c527d77..aaef7859 100644 --- a/modules/tabix/bgzip/main.nf +++ b/modules/tabix/bgzip/main.nf @@ -11,9 +11,9 @@ process TABIX_BGZIP { tuple val(meta), path(input) output: - tuple val(meta), path("${prefix}*"), emit: output - tuple val(meta), path("*gzi") , emit: gzi, optional: true - path "versions.yml" , emit: versions + tuple val(meta), path("${output}") , emit: output + tuple val(meta), path("${output}.gzi"), emit: gzi, optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -21,12 +21,13 @@ process TABIX_BGZIP { script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" - in_bgzip = input.toString().endsWith(".gz") + in_bgzip = ["gz", "bgz", "bgzf"].contains(input.getExtension()) + output = in_bgzip ? input.getBaseName() : "${prefix}.${input.getExtension()}.gz" command1 = in_bgzip ? '-d' : '-c' - command2 = in_bgzip ? '' : " > ${prefix}.${input.getExtension()}.gz" + command2 = in_bgzip ? '' : " > ${output}" // Name the index according to $prefix, unless a name has been requested if ((args.matches("(^| )-i\\b") || args.matches("(^| )--index(\$| )")) && !args.matches("(^| )-I\\b") && !args.matches("(^| )--index-name\\b")) { - args = args + " -I ${prefix}.${input.getExtension()}.gz.gzi" + args = args + " -I ${output}.gzi" } """ bgzip $command1 $args -@${task.cpus} $input $command2 From 03de60e6fa3b047da3433b5e624d8746af4464f5 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 30 Sep 2022 15:50:47 +0200 Subject: [PATCH 3/6] Add CHECKM/QA (#2137) * Bump checkM version and add support for pre-defined database * Add markfile output to checkm/lineagewf and add checkm/qa * Remove unnecessary BAM from tests * Fix channels to ensure single meta * Blah * Use secondary name evwyhere * Remove second incovcatiojn * Don't run module directrly for join * And for the other test * Correct module name * Fix name again * Try and fix out_format 9 not being picked up * Contains to matches to allow regex * Fix tests (finally) * Fix verisoning * [automated] Fix linting with Prettier * Update modules/checkm/qa/main.nf Co-authored-by: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Co-authored-by: nf-core-bot Co-authored-by: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> --- modules/checkm/lineagewf/main.nf | 7 +-- modules/checkm/lineagewf/meta.yml | 4 ++ modules/checkm/qa/main.nf | 44 ++++++++++++++++ modules/checkm/qa/meta.yml | 67 +++++++++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/checkm/qa/main.nf | 44 ++++++++++++++++ tests/modules/checkm/qa/nextflow.config | 16 ++++++ tests/modules/checkm/qa/test.yml | 16 ++++++ 8 files changed, 199 insertions(+), 3 deletions(-) create mode 100644 modules/checkm/qa/main.nf create mode 100644 modules/checkm/qa/meta.yml create mode 100644 tests/modules/checkm/qa/main.nf create mode 100644 tests/modules/checkm/qa/nextflow.config create mode 100644 tests/modules/checkm/qa/test.yml diff --git a/modules/checkm/lineagewf/main.nf b/modules/checkm/lineagewf/main.nf index 66e2e222..db0535e1 100644 --- a/modules/checkm/lineagewf/main.nf +++ b/modules/checkm/lineagewf/main.nf @@ -13,9 +13,10 @@ process CHECKM_LINEAGEWF { path db output: - tuple val(meta), path("${prefix}") , emit: checkm_output - tuple val(meta), path("${prefix}.tsv"), emit: checkm_tsv - path "versions.yml" , emit: versions + tuple val(meta), path("${prefix}") , emit: checkm_output + tuple val(meta), path("${prefix}/lineage.ms"), emit: marker_file + tuple val(meta), path("${prefix}.tsv") , emit: checkm_tsv + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/checkm/lineagewf/meta.yml b/modules/checkm/lineagewf/meta.yml index 6cfec189..4716a3e9 100644 --- a/modules/checkm/lineagewf/meta.yml +++ b/modules/checkm/lineagewf/meta.yml @@ -52,6 +52,10 @@ output: type: directory description: CheckM output directory pattern: "*/" + - checkm_output: + type: file + description: Lineage markfer file + pattern: "lineage.ms" - checkm_tsv: type: file description: CheckM summary completeness statistics table diff --git a/modules/checkm/qa/main.nf b/modules/checkm/qa/main.nf new file mode 100644 index 00000000..80b9747f --- /dev/null +++ b/modules/checkm/qa/main.nf @@ -0,0 +1,44 @@ +process CHECKM_QA { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::checkm-genome=1.2.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/checkm-genome:1.2.1--pyhdfd78af_0' : + 'quay.io/biocontainers/checkm-genome:1.2.1--pyhdfd78af_0' }" + + input: + tuple val(meta), path(analysis_dir), path(marker_file), path(coverage_file) + path exclude_marker_file + + output: + tuple val(meta), path("${prefix}.txt") , optional: true, emit: output + tuple val(meta), path("${prefix}.fasta"), optional: true, emit: fasta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.args?.matches(".*-o 9.*|.*--out_file 9.*") ? "fasta" : "txt" + def coverage = coverage_file ? "--coverage_file ${coverage_file}" : "" + def exclude = exclude_marker_file ? "--exclude_markers ${marker_filer}" : "" + """ + checkm \\ + qa \\ + --threads ${task.cpus} \\ + --file ${prefix}.${suffix} \\ + $marker_file \\ + $analysis_dir \\ + $coverage \\ + $exclude \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + checkm: \$( checkm 2>&1 | grep '...:::' | sed 's/.*CheckM v//;s/ .*//' ) + END_VERSIONS + """ +} diff --git a/modules/checkm/qa/meta.yml b/modules/checkm/qa/meta.yml new file mode 100644 index 00000000..d0af39af --- /dev/null +++ b/modules/checkm/qa/meta.yml @@ -0,0 +1,67 @@ +name: checkm_qa +description: CheckM provides a set of tools for assessing the quality of genomes recovered from isolates, single cells, or metagenomes. +keywords: + - checkm + - mag + - metagenome + - quality + - isolates + - microbes + - single cells + - completeness + - contamination + - bins + - genome bins + - qa + - quality assurnce +tools: + - checkm: + description: Assess the quality of microbial genomes recovered from isolates, single cells, and metagenomes. + homepage: https://ecogenomics.github.io/CheckM/ + documentation: https://github.com/Ecogenomics/CheckM/wiki + tool_dev_url: https://github.com/Ecogenomics/CheckM + doi: "10.1101/gr.186072.114" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - analysis_dir: + type: file + description: Directory containing output of checkm/analyze or checkm/lineage_wf etc. + pattern: "*" + - marker_file: + type: file + description: Marker file specified during checkm/analyze or produced by checkm/{lineage,taxonomy}_wf + pattern: "*.ms" + - coverage_file: + type: file + description: File containing coverage of each sequence (generated by checkm coverage) + - exclude_marker_file: + type: file + description: File specifying markers to exclude from marker sets + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - output: + type: file + description: "Default completeness statistics in various formats, as specified with --out_format (excluding option: 9)" + pattern: "*.txt" + - fasta: + type: file + description: Output in fasta format (only if --out_format 9) + pattern: "*.fasta" + +authors: + - "@jfy133" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index cae10d10..d1fbe0d8 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -499,6 +499,10 @@ checkm/lineagewf: - modules/checkm/lineagewf/** - tests/modules/checkm/lineagewf/** +checkm/qa: + - modules/checkm/qa/** + - tests/modules/checkm/qa/** + chromap/chromap: - modules/chromap/chromap/** - tests/modules/chromap/chromap/** diff --git a/tests/modules/checkm/qa/main.nf b/tests/modules/checkm/qa/main.nf new file mode 100644 index 00000000..2be2dd67 --- /dev/null +++ b/tests/modules/checkm/qa/main.nf @@ -0,0 +1,44 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { CHECKM_LINEAGEWF } from '../../../../modules/checkm/lineagewf/main.nf' +include { CHECKM_LINEAGEWF as CHECKM_LINEAGEWF_FASTA } from '../../../../modules/checkm/lineagewf/main.nf' +include { CHECKM_QA } from '../../../../modules/checkm/qa/main.nf' +include { CHECKM_QA as CHECKM_QA_FASTA } from '../../../../modules/checkm/qa/main.nf' + +workflow test_checkm_qa { + + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) ] + fasta_ext = 'fasta' + + CHECKM_LINEAGEWF ( input, fasta_ext, [] ) + + ch_checkmqa_input = CHECKM_LINEAGEWF.out.checkm_output + .join(CHECKM_LINEAGEWF.out.marker_file) + .map{ + meta, dir, marker -> + [ meta, dir, marker, []] + } + + CHECKM_QA ( ch_checkmqa_input, [] ) +} + +workflow test_checkm_qa_fasta { + + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) ] + fasta_ext = 'fasta' + + CHECKM_LINEAGEWF_FASTA ( input, fasta_ext, [] ) + + ch_checkmqa_input = CHECKM_LINEAGEWF_FASTA.out.checkm_output + .join(CHECKM_LINEAGEWF_FASTA.out.marker_file) + .map{ + meta, dir, marker -> + [ meta, dir, marker, []] + } + + CHECKM_QA_FASTA ( ch_checkmqa_input, [] ) +} diff --git a/tests/modules/checkm/qa/nextflow.config b/tests/modules/checkm/qa/nextflow.config new file mode 100644 index 00000000..0f5e524b --- /dev/null +++ b/tests/modules/checkm/qa/nextflow.config @@ -0,0 +1,16 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: CHECKM_QA { + ext.prefix = { "${meta.id}.qa" } + } + + + withName: CHECKM_QA_FASTA { + ext.args = "-o 9" + ext.prefix = { "${meta.id}.qa" } + } + + +} diff --git a/tests/modules/checkm/qa/test.yml b/tests/modules/checkm/qa/test.yml new file mode 100644 index 00000000..0c3fa4c8 --- /dev/null +++ b/tests/modules/checkm/qa/test.yml @@ -0,0 +1,16 @@ +- name: checkm qa test_checkm_qa + command: nextflow run ./tests/modules/checkm/qa -entry test_checkm_qa -c ./tests/config/nextflow.config -c ./tests/modules/checkm/qa/nextflow.config + tags: + - checkm/qa + - checkm + files: + - path: output/checkm/test.qa.txt + md5sum: 645f4282569afb4b171396732b2d2582 + +- name: checkm qa test_checkm_qa_fasta + command: nextflow run ./tests/modules/checkm/qa -entry test_checkm_qa_fasta -c ./tests/config/nextflow.config -c ./tests/modules/checkm/qa/nextflow.config + tags: + - checkm/qa + - checkm + files: + - path: output/checkm/test.qa.fasta From ef585046a04ed2ef9fb881a59dd43d45046e08d8 Mon Sep 17 00:00:00 2001 From: Lucpen Date: Fri, 30 Sep 2022 16:57:18 +0200 Subject: [PATCH 4/6] feat added vcflib/vcfbreakmulti module (#2134) * feat added vcflib/vcfbreakmulti module * fix comments review * Update modules/vcflib/vcfbreakmulti/main.nf Co-authored-by: Anders Jemt * Update modules/vcflib/vcfbreakmulti/main.nf Co-authored-by: Anders Jemt * fix change required by reviewer * fix commiting pytest_modules.yml * fix version * fix version_ * Update tests/modules/vcflib/vcfbreakmulti/test.yml Co-authored-by: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> * fix undid previous commit * fix test * Update tests/modules/vcflib/vcfbreakmulti/test.yml Co-authored-by: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> * Update tests/modules/vcflib/vcfbreakmulti/test.yml Co-authored-by: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Co-authored-by: Anders Jemt Co-authored-by: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> --- modules/vcflib/vcfbreakmulti/main.nf | 49 +++++++++++++++++++ modules/vcflib/vcfbreakmulti/meta.yml | 45 +++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/vcflib/vcfbreakmulti/main.nf | 16 ++++++ .../vcflib/vcfbreakmulti/nextflow.config | 5 ++ tests/modules/vcflib/vcfbreakmulti/test.yml | 16 ++++++ 6 files changed, 135 insertions(+) create mode 100644 modules/vcflib/vcfbreakmulti/main.nf create mode 100644 modules/vcflib/vcfbreakmulti/meta.yml create mode 100644 tests/modules/vcflib/vcfbreakmulti/main.nf create mode 100644 tests/modules/vcflib/vcfbreakmulti/nextflow.config create mode 100644 tests/modules/vcflib/vcfbreakmulti/test.yml diff --git a/modules/vcflib/vcfbreakmulti/main.nf b/modules/vcflib/vcfbreakmulti/main.nf new file mode 100644 index 00000000..0eb9cd7b --- /dev/null +++ b/modules/vcflib/vcfbreakmulti/main.nf @@ -0,0 +1,49 @@ +process VCFLIB_VCFBREAKMULTI { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::vcflib=1.0.3" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/vcflib:1.0.3--hecb563c_1': + 'quay.io/biocontainers/vcflib:1.0.3--ha025227_0' }" + + input: + tuple val(meta), path(vcf), path(tbi) + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.0.3' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + + """ + vcfbreakmulti \\ + $vcf \\ + $args \\ + | bgzip -c $args2 > ${prefix}.breakmulti.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vcflib: $VERSION + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.0.3' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}.breakmulti.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vcflib: $VERSION + END_VERSIONS + """ +} diff --git a/modules/vcflib/vcfbreakmulti/meta.yml b/modules/vcflib/vcfbreakmulti/meta.yml new file mode 100644 index 00000000..1f2835e6 --- /dev/null +++ b/modules/vcflib/vcfbreakmulti/meta.yml @@ -0,0 +1,45 @@ +name: "vcflib_vcfbreakmulti" +description: If multiple alleles are specified in a single record, break the record into several lines preserving allele-specific INFO fields +keywords: + - vcflib + - vcflib/vcfbreakmulti +tools: + - "vcflib": + description: "Command-line tools for manipulating VCF files" + homepage: https://github.com/vcflib/vcflib + documentation: https://github.com/vcflib/vcflib#USAGE + doi: "https://doi.org/10.1101/2021.05.21.445151" + licence: "['MIT']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: Compressed VCF file + pattern: "*.{.vcf.gz,vcf}" + - tbi: + type: file + description: Index of VCF file + pattern: "*.vcf.gz.tbi" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: Compressed VCF file + pattern: "*.vcf.gz" + +authors: + - "@lucpen" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index d1fbe0d8..bfc14a03 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -2500,6 +2500,10 @@ vcfanno: - modules/vcfanno/** - tests/modules/vcfanno/** +vcflib/vcfbreakmulti: + - modules/vcflib/vcfbreakmulti/** + - tests/modules/vcflib/vcfbreakmulti/** + vcflib/vcfuniq: - modules/vcflib/vcfuniq/** - tests/modules/vcflib/vcfuniq/** diff --git a/tests/modules/vcflib/vcfbreakmulti/main.nf b/tests/modules/vcflib/vcfbreakmulti/main.nf new file mode 100644 index 00000000..7e973300 --- /dev/null +++ b/tests/modules/vcflib/vcfbreakmulti/main.nf @@ -0,0 +1,16 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { VCFLIB_VCFBREAKMULTI } from '../../../../modules/vcflib/vcfbreakmulti/main.nf' + +workflow test_vcflib_vcfbreakmulti { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true) + ] + + VCFLIB_VCFBREAKMULTI ( input ) +} diff --git a/tests/modules/vcflib/vcfbreakmulti/nextflow.config b/tests/modules/vcflib/vcfbreakmulti/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/vcflib/vcfbreakmulti/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/vcflib/vcfbreakmulti/test.yml b/tests/modules/vcflib/vcfbreakmulti/test.yml new file mode 100644 index 00000000..d8ed82db --- /dev/null +++ b/tests/modules/vcflib/vcfbreakmulti/test.yml @@ -0,0 +1,16 @@ +- name: vcflib vcfbreakmulti + command: nextflow run ./tests/modules/vcflib/vcfbreakmulti -entry test_vcflib_vcfbreakmulti -c tests/config/nextflow.config + tags: + - vcflib/vcfbreakmulti + - vcflib + files: + - path: output/vcflib/test.breakmulti.vcf.gz + md5sum: 666d2d970399cf219dc1dc1eadff9170 + +- name: vcflib vcfbreakmulti stub + command: nextflow run ./tests/modules/vcflib/vcfbreakmulti -entry test_vcflib_vcfbreakmulti -c tests/config/nextflow.config -stub + tags: + - vcflib/vcfbreakmulti + - vcflib + files: + - path: output/vcflib/test.breakmulti.vcf.gz From 4eed099a12a7d586ce830a662a33a9df5bd67a24 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Fri, 30 Sep 2022 20:34:06 +0100 Subject: [PATCH 5/6] Bump HISAT2 version to 2.2.1 in all modules (#2139) * Fix broken Quast module and bump MultiQC version in dumpsoftwareversions * Bump HISAT2 version to 2.2.1 * Remove empty md5sums * Remove empty md5sums * Remove empty md5sums --- modules/hisat2/align/main.nf | 8 ++++---- modules/hisat2/build/main.nf | 2 +- modules/hisat2/extractsplicesites/main.nf | 2 +- tests/modules/hisat2/align/test.yml | 2 -- tests/modules/hisat2/build_test/test.yml | 1 - tests/modules/hisat2/extractsplicesites/test.yml | 1 - 6 files changed, 6 insertions(+), 10 deletions(-) diff --git a/modules/hisat2/align/main.nf b/modules/hisat2/align/main.nf index 8b6eb331..a6f81a6c 100644 --- a/modules/hisat2/align/main.nf +++ b/modules/hisat2/align/main.nf @@ -3,10 +3,10 @@ process HISAT2_ALIGN { label 'process_high' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda (params.enable_conda ? "bioconda::hisat2=2.2.0 bioconda::samtools=1.15.1" : null) + conda (params.enable_conda ? "bioconda::hisat2=2.2.1 bioconda::samtools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1:0e773bb207600fcb4d38202226eb20a33c7909b6-0' : - 'quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1:0e773bb207600fcb4d38202226eb20a33c7909b6-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1:38aed4501da19db366dc7c8d52d31d94e760cfaf-0' : + 'quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1:38aed4501da19db366dc7c8d52d31d94e760cfaf-0' }" input: tuple val(meta), path(reads) @@ -25,7 +25,7 @@ process HISAT2_ALIGN { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = '2.2.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def VERSION = '2.2.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. def strandedness = '' if (meta.strandedness == 'forward') { diff --git a/modules/hisat2/build/main.nf b/modules/hisat2/build/main.nf index 1b36c6b8..fee4064c 100644 --- a/modules/hisat2/build/main.nf +++ b/modules/hisat2/build/main.nf @@ -44,7 +44,7 @@ process HISAT2_BUILD { log.info "[HISAT2 index build] Less than ${hisat2_build_memory} GB available, so NOT using splice sites and exons to build HISAT2 index." log.info "[HISAT2 index build] Use --hisat2_build_memory [small number] to skip this check." } - def VERSION = '2.2.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def VERSION = '2.2.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ mkdir hisat2 $extract_exons diff --git a/modules/hisat2/extractsplicesites/main.nf b/modules/hisat2/extractsplicesites/main.nf index d644bcc5..1423f25c 100644 --- a/modules/hisat2/extractsplicesites/main.nf +++ b/modules/hisat2/extractsplicesites/main.nf @@ -20,7 +20,7 @@ process HISAT2_EXTRACTSPLICESITES { script: def args = task.ext.args ?: '' - def VERSION = '2.2.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def VERSION = '2.2.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ hisat2_extract_splice_sites.py $gtf > ${gtf.baseName}.splice_sites.txt cat <<-END_VERSIONS > versions.yml diff --git a/tests/modules/hisat2/align/test.yml b/tests/modules/hisat2/align/test.yml index 54e263bc..a99b2f94 100644 --- a/tests/modules/hisat2/align/test.yml +++ b/tests/modules/hisat2/align/test.yml @@ -7,7 +7,6 @@ - path: output/hisat2/test.hisat2.summary.log md5sum: 7b8a9e61b7646da1089b041333c41a87 - path: output/hisat2/genome.splice_sites.txt - md5sum: d41d8cd98f00b204e9800998ecf8427e - path: output/hisat2/test.bam - path: output/hisat2/hisat2/genome.5.ht2 md5sum: 91198831aaba993acac1734138c5f173 @@ -35,7 +34,6 @@ - path: output/hisat2/test.hisat2.summary.log md5sum: 9839b31db795958cc4b70711a3414e9c - path: output/hisat2/genome.splice_sites.txt - md5sum: d41d8cd98f00b204e9800998ecf8427e - path: output/hisat2/test.bam - path: output/hisat2/hisat2/genome.5.ht2 md5sum: 91198831aaba993acac1734138c5f173 diff --git a/tests/modules/hisat2/build_test/test.yml b/tests/modules/hisat2/build_test/test.yml index da5a450c..34c036d0 100644 --- a/tests/modules/hisat2/build_test/test.yml +++ b/tests/modules/hisat2/build_test/test.yml @@ -5,7 +5,6 @@ - hisat2/build files: - path: output/hisat2/genome.splice_sites.txt - md5sum: d41d8cd98f00b204e9800998ecf8427e - path: output/hisat2/hisat2/genome.5.ht2 md5sum: 91198831aaba993acac1734138c5f173 - path: output/hisat2/hisat2/genome.7.ht2 diff --git a/tests/modules/hisat2/extractsplicesites/test.yml b/tests/modules/hisat2/extractsplicesites/test.yml index a528199c..2571af94 100644 --- a/tests/modules/hisat2/extractsplicesites/test.yml +++ b/tests/modules/hisat2/extractsplicesites/test.yml @@ -5,4 +5,3 @@ - hisat2/extractsplicesites files: - path: output/hisat2/genome.splice_sites.txt - md5sum: d41d8cd98f00b204e9800998ecf8427e From c9889866a921081c70cdda3a28623e57537f6dbe Mon Sep 17 00:00:00 2001 From: Daniel Straub <42973691+d4straub@users.noreply.github.com> Date: Fri, 30 Sep 2022 22:39:55 +0200 Subject: [PATCH 6/6] add FCS adaptor (#2033) * add FCS adaptor * run prettier * fix EClint * add keywords to meta * fix docker Co-authored-by: Simon Pearce <24893913+SPPearce@users.noreply.github.com> --- modules/fcs/fcsadaptor/main.nf | 49 ++++++++++++++++ modules/fcs/fcsadaptor/meta.yml | 62 ++++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/fcs/fcsadaptor/main.nf | 15 +++++ tests/modules/fcs/fcsadaptor/nextflow.config | 5 ++ tests/modules/fcs/fcsadaptor/test.yml | 15 +++++ 6 files changed, 150 insertions(+) create mode 100644 modules/fcs/fcsadaptor/main.nf create mode 100644 modules/fcs/fcsadaptor/meta.yml create mode 100644 tests/modules/fcs/fcsadaptor/main.nf create mode 100644 tests/modules/fcs/fcsadaptor/nextflow.config create mode 100644 tests/modules/fcs/fcsadaptor/test.yml diff --git a/modules/fcs/fcsadaptor/main.nf b/modules/fcs/fcsadaptor/main.nf new file mode 100644 index 00000000..2a0afb5b --- /dev/null +++ b/modules/fcs/fcsadaptor/main.nf @@ -0,0 +1,49 @@ +process FCS_FCSADAPTOR { + tag "$meta.id" + label 'process_low' + + if (params.enable_conda) { + exit 1, "Conda environments cannot be used when using the FCS tool. Please use docker or singularity containers." + } + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/releases/0.2.3/fcs-adaptor.0.2.3.sif': + 'ncbi/fcs-adaptor:0.2.3' }" + + input: + tuple val(meta), path(assembly) + + output: + tuple val(meta), path("*.cleaned_sequences.fa.gz"), emit: cleaned_assembly + tuple val(meta), path("*.fcs_adaptor_report.txt") , emit: adaptor_report + tuple val(meta), path("*.fcs_adaptor.log") , emit: log + tuple val(meta), path("*.pipeline_args.yaml") , emit: pipeline_args + tuple val(meta), path("*.skipped_trims.jsonl") , emit: skipped_trims + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '--prok' // --prok || --euk + def prefix = task.ext.prefix ?: "${meta.id}" + def FCSADAPTOR_VERSION = '0.2.3' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + /app/fcs/bin/av_screen_x \\ + -o output/ \\ + $args \\ + $assembly + + # compress and/or rename files with prefix + gzip -cf output/cleaned_sequences/* > "${prefix}.cleaned_sequences.fa.gz" + cp "output/fcs_adaptor_report.txt" "${prefix}.fcs_adaptor_report.txt" + cp "output/fcs_adaptor.log" "${prefix}.fcs_adaptor.log" + cp "output/pipeline_args.yaml" "${prefix}.pipeline_args.yaml" + cp "output/skipped_trims.jsonl" "${prefix}.skipped_trims.jsonl" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + FCS-adaptor: $FCSADAPTOR_VERSION + END_VERSIONS + """ +} diff --git a/modules/fcs/fcsadaptor/meta.yml b/modules/fcs/fcsadaptor/meta.yml new file mode 100644 index 00000000..e6fb1662 --- /dev/null +++ b/modules/fcs/fcsadaptor/meta.yml @@ -0,0 +1,62 @@ +name: "fcs_fcsadaptor" +description: Run NCBI's FCS adaptor on assembled genomes +keywords: + - assembly + - genomics + - quality control + - contamination + - NCBI +tools: + - "fcs": + description: | + The Foreign Contamination Screening (FCS) tool rapidly detects contaminants from foreign + organisms in genome assemblies to prepare your data for submission. Therefore, the + submission process to NCBI is faster and fewer contaminated genomes are submitted. + This reduces errors in analyses and conclusions, not just for the original data submitter + but for all subsequent users of the assembly. + homepage: "https://www.ncbi.nlm.nih.gov/data-hub/cgr/data-quality-tools/" + documentation: "https://github.com/ncbi/fcs/wiki/FCS-adaptor" + tool_dev_url: "https://github.com/ncbi/fcs" + doi: "" + licence: "United States Government Work" +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - assembly: + type: file + description: assembly fasta file +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - cleaned_assembly: + type: file + description: Cleaned assembly in fasta format + pattern: "*.{cleaned_sequences.fa.gz}" + - adaptor_report: + type: file + description: Report of identified adaptors + pattern: "*.{fcs_adaptor_report.txt}" + - log: + type: file + description: Log file + pattern: "*.{fcs_adaptor.log}" + - pipeline_args: + type: file + description: Run arguments + pattern: "*.{pipeline_args.yaml}" + - skipped_trims: + type: file + description: Skipped trim information + pattern: "*.{skipped_trims.jsonl}" +authors: + - "@d4straub" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index bfc14a03..50512097 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -803,6 +803,10 @@ fasttree: - modules/fasttree/** - tests/modules/fasttree/** +fcs/fcsadaptor: + - modules/fcs/fcsadaptor/** + - tests/modules/fcs/fcsadaptor/** + ffq: - modules/ffq/** - tests/modules/ffq/** diff --git a/tests/modules/fcs/fcsadaptor/main.nf b/tests/modules/fcs/fcsadaptor/main.nf new file mode 100644 index 00000000..f86dd430 --- /dev/null +++ b/tests/modules/fcs/fcsadaptor/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { FCS_FCSADAPTOR } from '../../../../modules/fcs/fcsadaptor/main.nf' + +workflow test_fcs_fcsadaptor { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true) + ] + + FCS_FCSADAPTOR ( input ) +} diff --git a/tests/modules/fcs/fcsadaptor/nextflow.config b/tests/modules/fcs/fcsadaptor/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/fcs/fcsadaptor/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/fcs/fcsadaptor/test.yml b/tests/modules/fcs/fcsadaptor/test.yml new file mode 100644 index 00000000..85d931a9 --- /dev/null +++ b/tests/modules/fcs/fcsadaptor/test.yml @@ -0,0 +1,15 @@ +- name: fcs fcsadaptor test_fcs_fcsadaptor + command: nextflow run ./tests/modules/fcs/fcsadaptor -entry test_fcs_fcsadaptor -c ./tests/config/nextflow.config -c ./tests/modules/fcs/fcsadaptor/nextflow.config + tags: + - fcs/fcsadaptor + - fcs + files: + - path: output/fcs/test.cleaned_sequences.fa.gz + md5sum: 2f331e59b352418420a35236091bf93c + - path: output/fcs/test.fcs_adaptor.log + contains: ["Original command:", "Executing:", "INFO:cwltool:[workflow ] completed success"] + - path: output/fcs/test.fcs_adaptor_report.txt + md5sum: 27375be4671e01d2d2674ddd1d44414a + - path: output/fcs/test.pipeline_args.yaml + md5sum: 80effed2a75550a6a861c47f4c30449d + - path: output/fcs/test.skipped_trims.jsonl