From d0a1cbb703a130c19f6796c3fce24fbe7dfce789 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 18 Feb 2022 15:38:05 +0100 Subject: [PATCH 01/15] fix fastp to allow exporting of only `*.merged.fastq.gz` (#1325) * fix: remove left-over unnecessary code * Update main.nf * Update meta.yml --- modules/fastp/main.nf | 2 +- modules/fastp/meta.yml | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/modules/fastp/main.nf b/modules/fastp/main.nf index d8218350..5c9e3b83 100644 --- a/modules/fastp/main.nf +++ b/modules/fastp/main.nf @@ -13,7 +13,7 @@ process FASTP { val save_merged output: - tuple val(meta), path('*.trim.fastq.gz') , emit: reads + tuple val(meta), path('*.trim.fastq.gz') , optional:true, emit: reads tuple val(meta), path('*.json') , emit: json tuple val(meta), path('*.html') , emit: html tuple val(meta), path('*.log') , emit: log diff --git a/modules/fastp/meta.yml b/modules/fastp/meta.yml index f53bb09f..3274e41b 100644 --- a/modules/fastp/meta.yml +++ b/modules/fastp/meta.yml @@ -22,6 +22,12 @@ input: description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. + - save_trimmed_fail: + type: boolean + description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz` + - save_merged: + type: boolean + description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz` output: - meta: From cab399507bea60d90de6d7b296163210c371b693 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Fri, 18 Feb 2022 17:36:09 +0000 Subject: [PATCH 02/15] Patch fix artic/minion and ivar/variants modules (#1326) --- modules/artic/minion/main.nf | 5 +++-- modules/artic/minion/meta.yml | 11 ++++++++--- modules/ivar/variants/main.nf | 1 + modules/ivar/variants/meta.yml | 4 ++++ tests/modules/artic/minion/main.nf | 5 ++--- tests/modules/ivar/variants/main.nf | 15 +++++++++------ 6 files changed, 27 insertions(+), 14 deletions(-) diff --git a/modules/artic/minion/main.nf b/modules/artic/minion/main.nf index af74b132..22a6fd87 100644 --- a/modules/artic/minion/main.nf +++ b/modules/artic/minion/main.nf @@ -13,7 +13,8 @@ process ARTIC_MINION { path sequencing_summary path ("primer-schemes/${scheme}/V${scheme_version}/${scheme}.reference.fasta") path ("primer-schemes/${scheme}/V${scheme_version}/${scheme}.scheme.bed") - path medaka_model + path medaka_model_file + val medaka_model_string val scheme val scheme_version @@ -44,7 +45,7 @@ process ARTIC_MINION { if (args.tokenize().contains('--medaka')) { fast5 = "" summary = "" - model = file(medaka_model).exists() ? "--medaka-model ./$medaka_model" : "--medaka-model $medaka_model" + model = medaka_model_file ? "--medaka-model ./$medaka_model_file" : "--medaka-model $medaka_model_string" } def hd5_plugin_path = task.ext.hd5_plugin_path ? "export HDF5_PLUGIN_PATH=" + task.ext.hd5_plugin_path : "export HDF5_PLUGIN_PATH=/usr/local/lib/python3.6/site-packages/ont_fast5_api/vbz_plugin" """ diff --git a/modules/artic/minion/meta.yml b/modules/artic/minion/meta.yml index 5ef55673..c0f97a0c 100644 --- a/modules/artic/minion/meta.yml +++ b/modules/artic/minion/meta.yml @@ -42,11 +42,16 @@ input: bed files containing coordinates of each primer in the scheme, relative to the reference genome pattern: "*.{scheme.bed}" - - medaka_model: + - medaka_model_file: type: file description: | - Medaka model to use (if option --medaka is set) - pattern: "*.*" + Medaka model file to use (if option --medaka is set) + pattern: "*.hdf5" + - medaka_model_string: + type: value + description: | + Medaka model string to use (if option --medaka is set) + pattern: "*" - scheme: type: value description: Name of the primer scheme diff --git a/modules/ivar/variants/main.nf b/modules/ivar/variants/main.nf index 85c4cacd..b86042f0 100644 --- a/modules/ivar/variants/main.nf +++ b/modules/ivar/variants/main.nf @@ -10,6 +10,7 @@ process IVAR_VARIANTS { input: tuple val(meta), path(bam) path fasta + path fai path gff val save_mpileup diff --git a/modules/ivar/variants/meta.yml b/modules/ivar/variants/meta.yml index 6677ffb2..4d09e449 100644 --- a/modules/ivar/variants/meta.yml +++ b/modules/ivar/variants/meta.yml @@ -25,6 +25,10 @@ input: type: file description: The reference sequence used for mapping and generating the BAM file pattern: "*.fa" + - fai: + type: file + description: The index for the reference sequence used for mapping and generating the BAM file + pattern: "*.fai" - gff: type: file description: A GFF file in the GFF3 format can be supplied to specify coordinates of open reading frames (ORFs). In absence of GFF file, amino acid translation will not be done. diff --git a/tests/modules/artic/minion/main.nf b/tests/modules/artic/minion/main.nf index ca66ede0..3bda2ffc 100644 --- a/tests/modules/artic/minion/main.nf +++ b/tests/modules/artic/minion/main.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl = 2 -include { UNTAR } from '../../../../modules/untar/main.nf' +include { UNTAR } from '../../../../modules/untar/main.nf' include { ARTIC_MINION } from '../../../../modules/artic/minion/main.nf' workflow test_artic_minion { @@ -15,9 +15,8 @@ workflow test_artic_minion { sequencing_summary = file(params.test_data['sarscov2']['nanopore']['test_sequencing_summary'], checkIfExists: true) fasta = file('https://github.com/artic-network/primer-schemes/raw/master/nCoV-2019/V3/nCoV-2019.reference.fasta', checkIfExists: true) bed = file('https://github.com/artic-network/primer-schemes/raw/master/nCoV-2019/V3/nCoV-2019.primer.bed', checkIfExists: true) - dummy_file = [] fast5_dir = UNTAR ( fast5_tar ).untar - ARTIC_MINION ( input, fast5_dir, sequencing_summary, fasta, bed, dummy_file, 'nCoV-2019', '3') + ARTIC_MINION ( input, fast5_dir, sequencing_summary, fasta, bed, [], '', 'nCoV-2019', '3') } diff --git a/tests/modules/ivar/variants/main.nf b/tests/modules/ivar/variants/main.nf index 96835c60..2d8e77c5 100644 --- a/tests/modules/ivar/variants/main.nf +++ b/tests/modules/ivar/variants/main.nf @@ -11,10 +11,11 @@ workflow test_ivar_variants_no_gff_no_mpileup { file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) - gff = [] + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + gff = [] save_mpileup = false - IVAR_VARIANTS ( input, fasta, gff, save_mpileup ) + IVAR_VARIANTS ( input, fasta, fai, gff, save_mpileup ) } workflow test_ivar_variants_no_gff_with_mpileup { @@ -24,10 +25,11 @@ workflow test_ivar_variants_no_gff_with_mpileup { file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) - gff = [] + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + gff = [] save_mpileup = true - IVAR_VARIANTS ( input, fasta, gff, save_mpileup ) + IVAR_VARIANTS ( input, fasta, fai, gff, save_mpileup ) } workflow test_ivar_variants_with_gff_with_mpileup { @@ -37,8 +39,9 @@ workflow test_ivar_variants_with_gff_with_mpileup { file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) - gff = file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + gff = file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) save_mpileup = true - IVAR_VARIANTS ( input, fasta, gff, save_mpileup ) + IVAR_VARIANTS ( input, fasta, fai, gff, save_mpileup ) } From 8055c5d1c3920ae1f579a428bdd181f7beb2d302 Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Fri, 18 Feb 2022 17:23:44 -0700 Subject: [PATCH 03/15] add module for rgi (#1321) * add module for rgi * fix extension * fix test yaml * Update main.nf * Update main.nf --- modules/rgi/main/main.nf | 37 ++++++++++++++++++++ modules/rgi/main/meta.yml | 47 ++++++++++++++++++++++++++ tests/config/pytest_modules.yml | 4 +++ tests/modules/rgi/main/main.nf | 15 ++++++++ tests/modules/rgi/main/nextflow.config | 5 +++ tests/modules/rgi/main/test.yml | 12 +++++++ 6 files changed, 120 insertions(+) create mode 100644 modules/rgi/main/main.nf create mode 100644 modules/rgi/main/meta.yml create mode 100644 tests/modules/rgi/main/main.nf create mode 100644 tests/modules/rgi/main/nextflow.config create mode 100644 tests/modules/rgi/main/test.yml diff --git a/modules/rgi/main/main.nf b/modules/rgi/main/main.nf new file mode 100644 index 00000000..bf00d333 --- /dev/null +++ b/modules/rgi/main/main.nf @@ -0,0 +1,37 @@ +process RGI_MAIN { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::rgi=5.2.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/rgi:5.2.1--pyha8f3691_2': + 'quay.io/biocontainers/rgi:5.2.1--pyha8f3691_2' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.json"), emit: json + tuple val(meta), path("*.txt") , emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + rgi \\ + main \\ + $args \\ + --num_threads $task.cpus \\ + --output_file $prefix \\ + --input_sequence $fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rgi: \$(rgi main --version) + END_VERSIONS + """ +} diff --git a/modules/rgi/main/meta.yml b/modules/rgi/main/meta.yml new file mode 100644 index 00000000..cd97ff92 --- /dev/null +++ b/modules/rgi/main/meta.yml @@ -0,0 +1,47 @@ +name: rgi_main +description: Predict antibiotic resistance from protein or nucleotide data +keywords: + - bacteria + - fasta + - antibiotic resistance +tools: + - rgi: + description: This tool provides a preliminary annotation of your DNA sequence(s) based upon the data available in The Comprehensive Antibiotic Resistance Database (CARD). Hits to genes tagged with Antibiotic Resistance ontology terms will be highlighted. As CARD expands to include more pathogens, genomes, plasmids, and ontology terms this tool will grow increasingly powerful in providing first-pass detection of antibiotic resistance associated genes. See license at CARD website + homepage: https://card.mcmaster.ca + documentation: https://github.com/arpcard/rgi + tool_dev_url: https://github.com/arpcard/rgi + doi: "10.1093/nar/gkz935" + licence: ['https://card.mcmaster.ca/about'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Nucleotide or protein sequences in FASTA format + pattern: "*.{fasta,fasta.gz,fa,fa.gz,fna,fna.gz,faa,faa.gz}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - json: + type: file + description: JSON formatted file with RGI results + pattern: "*.{json}" + - tsv: + type: file + description: Tab-delimited file with RGI results + pattern: "*.{txt}" + +authors: + - "@rpetit3" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 8ed68dca..9aa4c754 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1261,6 +1261,10 @@ raxmlng: - modules/raxmlng/** - tests/modules/raxmlng/** +rgi/main: + - modules/rgi/main/** + - tests/modules/rgi/main/** + rmarkdownnotebook: - modules/rmarkdownnotebook/** - tests/modules/rmarkdownnotebook/** diff --git a/tests/modules/rgi/main/main.nf b/tests/modules/rgi/main/main.nf new file mode 100644 index 00000000..9182a154 --- /dev/null +++ b/tests/modules/rgi/main/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { RGI_MAIN } from '../../../../modules/rgi/main/main.nf' + +workflow test_rgi_main { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['haemophilus_influenzae']['genome']['genome_fna_gz'], checkIfExists: true) + ] + + RGI_MAIN ( input ) +} diff --git a/tests/modules/rgi/main/nextflow.config b/tests/modules/rgi/main/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/rgi/main/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/rgi/main/test.yml b/tests/modules/rgi/main/test.yml new file mode 100644 index 00000000..65d4ad9f --- /dev/null +++ b/tests/modules/rgi/main/test.yml @@ -0,0 +1,12 @@ +- name: rgi main + command: nextflow run ./tests/modules/rgi/main -entry test_rgi_main -c ./tests/config/nextflow.config -c ./tests/modules/rgi/main/nextflow.config + tags: + - rgi + - rgi/main + files: + - path: output/rgi/test.json + contains: ["NZ_LS483480", "orf_end", "perc_identity", "Pulvomycin"] + - path: output/rgi/test.txt + contains: ["NZ_LS483480", "ORF_ID", "Model_type", "Pulvomycin"] + - path: output/rgi/versions.yml + md5sum: 614a45d9d59680d4e743498773cf830a From 439763bf2c19b9836cc4d600747bf765170bada1 Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Fri, 18 Feb 2022 17:33:48 -0700 Subject: [PATCH 04/15] add module for sistr (#1323) * add module for sistr * Update test.yml --- modules/sistr/main.nf | 49 +++++++++++++++++++++++++ modules/sistr/meta.yml | 55 +++++++++++++++++++++++++++++ tests/config/pytest_modules.yml | 4 +++ tests/modules/sistr/main.nf | 15 ++++++++ tests/modules/sistr/nextflow.config | 5 +++ tests/modules/sistr/test.yml | 15 ++++++++ 6 files changed, 143 insertions(+) create mode 100644 modules/sistr/main.nf create mode 100644 modules/sistr/meta.yml create mode 100644 tests/modules/sistr/main.nf create mode 100644 tests/modules/sistr/nextflow.config create mode 100644 tests/modules/sistr/test.yml diff --git a/modules/sistr/main.nf b/modules/sistr/main.nf new file mode 100644 index 00000000..0301d053 --- /dev/null +++ b/modules/sistr/main.nf @@ -0,0 +1,49 @@ +process SISTR { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::sistr_cmd=1.1.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/sistr_cmd:1.1.1--pyh864c0ab_2': + 'quay.io/biocontainers/sistr_cmd:1.1.1--pyh864c0ab_2' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.tab") , emit: tsv + tuple val(meta), path("*-allele.fasta"), emit: allele_fasta + tuple val(meta), path("*-allele.json") , emit: allele_json + tuple val(meta), path("*-cgmlst.csv") , emit: cgmlst_csv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def is_compressed = fasta.getName().endsWith(".gz") ? true : false + def fasta_name = fasta.getName().replace(".gz", "") + """ + if [ "$is_compressed" == "true" ]; then + gzip -c -d $fasta > $fasta_name + fi + + sistr \\ + --qc \\ + $args \\ + --threads $task.cpus \\ + --alleles-output ${prefix}-allele.json \\ + --novel-alleles ${prefix}-allele.fasta \\ + --cgmlst-profiles ${prefix}-cgmlst.csv \\ + --output-prediction ${prefix} \\ + --output-format tab \\ + $fasta_name + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sistr: \$(echo \$(sistr --version 2>&1) | sed 's/^.*sistr_cmd //; s/ .*\$//' ) + END_VERSIONS + """ +} diff --git a/modules/sistr/meta.yml b/modules/sistr/meta.yml new file mode 100644 index 00000000..5ce43334 --- /dev/null +++ b/modules/sistr/meta.yml @@ -0,0 +1,55 @@ +name: sistr +description: Serovar prediction of salmonella assemblies +keywords: + - bacteria + - fasta + - salmonella +tools: + - sistr: + description: Salmonella In Silico Typing Resource (SISTR) commandline tool for serovar prediction + homepage: https://github.com/phac-nml/sistr_cmd + documentation: https://github.com/phac-nml/sistr_cmd + tool_dev_url: https://github.com/phac-nml/sistr_cmd + doi: "10.1371/journal.pone.0147101" + licence: ['Apache-2.0'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Nucleotide or protein sequences in FASTA format + pattern: "*.{fasta,fasta.gz,fa,fa.gz,fna,fna.gz,faa,faa.gz}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - tsv: + type: file + description: SISTR serovar prediction + pattern: "*.{tsv}" + - allele_json: + type: file + description: Allele sequences and info to JSON + pattern: "*.{json}" + - allele_fasta: + type: file + description: FASTA file destination of novel cgMLST alleles + pattern: "*.{fasta}" + - cgmlst_csv: + type: file + description: CSV file destination for cgMLST allelic profiles + pattern: "*.{csv}" + +authors: + - "@rpetit3" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 9aa4c754..5ee2851f 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1429,6 +1429,10 @@ shovill: - modules/shovill/** - tests/modules/shovill/** +sistr: + - modules/sistr/** + - tests/modules/sistr/** + snpdists: - modules/snpdists/** - tests/modules/snpdists/** diff --git a/tests/modules/sistr/main.nf b/tests/modules/sistr/main.nf new file mode 100644 index 00000000..4bd84844 --- /dev/null +++ b/tests/modules/sistr/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { SISTR } from '../../../modules/sistr/main.nf' + +workflow test_sistr { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['haemophilus_influenzae']['genome']['genome_fna_gz'], checkIfExists: true) + ] + + SISTR ( input ) +} diff --git a/tests/modules/sistr/nextflow.config b/tests/modules/sistr/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/sistr/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/sistr/test.yml b/tests/modules/sistr/test.yml new file mode 100644 index 00000000..88182f28 --- /dev/null +++ b/tests/modules/sistr/test.yml @@ -0,0 +1,15 @@ +- name: sistr test_sistr + command: nextflow run tests/modules/sistr -entry test_sistr -c tests/config/nextflow.config + tags: + - sistr + files: + - path: output/sistr/test-allele.fasta + md5sum: 144a74999eb9dd01520be5c61e8bd210 + - path: output/sistr/test-allele.json + md5sum: 3eb993c9489904621f539a93ff9a90ec + - path: output/sistr/test-cgmlst.csv + md5sum: c50a2144955fe1b98a6d5792bf295088 + - path: output/sistr/test.tab + contains: ["cgmlst_ST", "serovar", "matched"] + - path: output/sistr/versions.yml + md5sum: 8b852f002c3ce67e3f6498da15b28296 From 425939a108ac2674e4ac04327195285331a59d60 Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Fri, 18 Feb 2022 17:42:18 -0700 Subject: [PATCH 05/15] add module for legsta (#1319) --- modules/legsta/main.nf | 33 ++++++++++++++++++++++ modules/legsta/meta.yml | 42 ++++++++++++++++++++++++++++ tests/config/pytest_modules.yml | 4 +++ tests/modules/legsta/main.nf | 15 ++++++++++ tests/modules/legsta/nextflow.config | 5 ++++ tests/modules/legsta/test.yml | 9 ++++++ 6 files changed, 108 insertions(+) create mode 100644 modules/legsta/main.nf create mode 100644 modules/legsta/meta.yml create mode 100644 tests/modules/legsta/main.nf create mode 100644 tests/modules/legsta/nextflow.config create mode 100644 tests/modules/legsta/test.yml diff --git a/modules/legsta/main.nf b/modules/legsta/main.nf new file mode 100644 index 00000000..8e2a3ae3 --- /dev/null +++ b/modules/legsta/main.nf @@ -0,0 +1,33 @@ +process LEGSTA { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::legsta=0.5.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/legsta%3A0.5.1--hdfd78af_2': + 'quay.io/biocontainers/legsta:0.5.1--hdfd78af_2' }" + + input: + tuple val(meta), path(seqs) + + output: + tuple val(meta), path("*.tsv"), emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + legsta \\ + $args \\ + $seqs > ${prefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + legsta: \$(echo \$(legsta --version 2>&1) | sed 's/^.*legsta //; s/ .*\$//;') + END_VERSIONS + """ +} diff --git a/modules/legsta/meta.yml b/modules/legsta/meta.yml new file mode 100644 index 00000000..24013c67 --- /dev/null +++ b/modules/legsta/meta.yml @@ -0,0 +1,42 @@ +name: legsta +description: Typing of clinical and environmental isolates of Legionella pneumophila +keywords: + - bacteria + - legionella +tools: + - legsta: + description: In silico Legionella pneumophila Sequence Based Typing + homepage: https://github.com/tseemann/legsta + documentation: https://github.com/tseemann/legsta + tool_dev_url: https://github.com/tseemann/legsta + doi: "" + licence: ['GPL v3'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - seqs: + type: file + description: FASTA, GenBank or EMBL formatted files + pattern: "*.{fasta,gbk,embl}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - tsv: + type: file + description: Tab-delimited summary of the results + pattern: "*.{tsv}" + +authors: + - "@rpetit3" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 5ee2851f..8cc03dd3 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -892,6 +892,10 @@ leehom: - modules/leehom/** - tests/modules/leehom/** +legsta: + - modules/legsta/** + - tests/modules/legsta/** + lima: - modules/lima/** - tests/modules/lima/** diff --git a/tests/modules/legsta/main.nf b/tests/modules/legsta/main.nf new file mode 100644 index 00000000..606d8209 --- /dev/null +++ b/tests/modules/legsta/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { LEGSTA } from '../../../modules/legsta/main.nf' + +workflow test_legsta { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + + LEGSTA ( input ) +} diff --git a/tests/modules/legsta/nextflow.config b/tests/modules/legsta/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/legsta/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/legsta/test.yml b/tests/modules/legsta/test.yml new file mode 100644 index 00000000..8ac28a7e --- /dev/null +++ b/tests/modules/legsta/test.yml @@ -0,0 +1,9 @@ +- name: legsta test_legsta + command: nextflow run tests/modules/legsta -entry test_legsta -c tests/config/nextflow.config + tags: + - legsta + files: + - path: output/legsta/test.tsv + md5sum: c493bdd19335de4828aa8b4e3ce7e1f8 + - path: output/legsta/versions.yml + md5sum: d16c5f6fd68d2bcc2c71954e3342aabe From 45466684e7d933057054c28c160d27628852390c Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Fri, 18 Feb 2022 17:52:38 -0700 Subject: [PATCH 06/15] add mobsuite_recon module (#1270) * add mobsuite_recon module * Update main.nf * Update nextflow.config * Update test.yml Co-authored-by: Sateesh <33637490+sateeshperi@users.noreply.github.com> --- modules/mobsuite/recon/main.nf | 45 ++++++++++++++++ modules/mobsuite/recon/meta.yml | 54 ++++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/mobsuite/recon/main.nf | 13 +++++ tests/modules/mobsuite/recon/nextflow.config | 5 ++ tests/modules/mobsuite/recon/test.yml | 12 +++++ 6 files changed, 133 insertions(+) create mode 100644 modules/mobsuite/recon/main.nf create mode 100644 modules/mobsuite/recon/meta.yml create mode 100644 tests/modules/mobsuite/recon/main.nf create mode 100644 tests/modules/mobsuite/recon/nextflow.config create mode 100644 tests/modules/mobsuite/recon/test.yml diff --git a/modules/mobsuite/recon/main.nf b/modules/mobsuite/recon/main.nf new file mode 100644 index 00000000..9ca7e180 --- /dev/null +++ b/modules/mobsuite/recon/main.nf @@ -0,0 +1,45 @@ +process MOBSUITE_RECON { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::mob_suite=3.0.3" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mob_suite%3A3.0.3--pyhdfd78af_0': + 'quay.io/biocontainers/mob_suite:3.0.3--pyhdfd78af_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("results/chromosome.fasta") , emit: chromosome + tuple val(meta), path("results/contig_report.txt") , emit: contig_report + tuple val(meta), path("results/plasmid_*.fasta") , emit: plasmids , optional: true + tuple val(meta), path("results/mobtyper_results.txt"), emit: mobtyper_results, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def is_compressed = fasta.getName().endsWith(".gz") ? true : false + def fasta_name = fasta.getName().replace(".gz", "") + """ + if [ "$is_compressed" == "true" ]; then + gzip -c -d $fasta > $fasta_name + fi + + mob_recon \\ + --infile $fasta_name \\ + $args \\ + --num_threads $task.cpus \\ + --outdir results \\ + --sample_id $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mobsuite: \$(echo \$(mob_recon --version 2>&1) | sed 's/^.*mob_recon //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/mobsuite/recon/meta.yml b/modules/mobsuite/recon/meta.yml new file mode 100644 index 00000000..b5232142 --- /dev/null +++ b/modules/mobsuite/recon/meta.yml @@ -0,0 +1,54 @@ +name: mobsuite_recon +description: A tool to reconstruct plasmids in bacterial assemblies +keywords: + - bacteria + - plasmid +tools: + - mobsuite: + description: Software tools for clustering, reconstruction and typing of plasmids from draft assemblies. + homepage: https://github.com/phac-nml/mob-suite + documentation: https://github.com/phac-nml/mob-suite + tool_dev_url: https://github.com/phac-nml/mob-suite + doi: "10.1099/mgen.0.000435" + licence: ['Apache License, Version 2.0'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: A bacterial genome assembly in FASTA format + pattern: "*.{fasta,fa,fna}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - chromosome: + type: file + description: FASTA file of all contigs found to belong to the chromosome + pattern: "chromosome.fasta" + - contig_report: + type: file + description: Assignment of the contig to chromosome or a particular plasmid grouping + pattern: "contig_report.txt" + - plasmids: + type: file + description: Each plasmid group is written to an individual FASTA + pattern: "plasmid_*.fasta" + - mobtyper_results: + type: file + description: Aggregate MOB-typer report files for all identified plasmid + pattern: "mobtyper_results.txt" + +authors: + - "@rpetit3" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 8cc03dd3..a053a57c 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1024,6 +1024,10 @@ mlst: - modules/mlst/** - tests/modules/mlst/** +mobsuite/recon: + - modules/mobsuite/recon/** + - tests/modules/mobsuite/recon/** + mosdepth: - modules/mosdepth/** - tests/modules/mosdepth/** diff --git a/tests/modules/mobsuite/recon/main.nf b/tests/modules/mobsuite/recon/main.nf new file mode 100644 index 00000000..0d18ef9c --- /dev/null +++ b/tests/modules/mobsuite/recon/main.nf @@ -0,0 +1,13 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { MOBSUITE_RECON } from '../../../../modules/mobsuite/recon/main.nf' + +workflow test_mobsuite_recon { + + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + + MOBSUITE_RECON ( input ) +} diff --git a/tests/modules/mobsuite/recon/nextflow.config b/tests/modules/mobsuite/recon/nextflow.config new file mode 100644 index 00000000..06a716aa --- /dev/null +++ b/tests/modules/mobsuite/recon/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} diff --git a/tests/modules/mobsuite/recon/test.yml b/tests/modules/mobsuite/recon/test.yml new file mode 100644 index 00000000..dfff835b --- /dev/null +++ b/tests/modules/mobsuite/recon/test.yml @@ -0,0 +1,12 @@ +- name: mobsuite recon test_mobsuite_recon + command: nextflow run tests/modules/mobsuite/recon -entry test_mobsuite_recon -c tests/config/nextflow.config + tags: + - mobsuite/recon + - mobsuite + files: + - path: output/mobsuite/results/chromosome.fasta + md5sum: 33b2a0fa321c73c6ba8d8272dd53c6d4 + - path: output/mobsuite/results/contig_report.txt + md5sum: a0ae364a9f2b475f77588d0b3c24b857 + - path: output/mobsuite/versions.yml + md5sum: 7f7a0f8957394b0e526233a0edb8e20a From be798861c616e445abbaf9f2549deb10055c2ed1 Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Fri, 18 Feb 2022 18:00:21 -0700 Subject: [PATCH 07/15] add module for abricate (#1280) * add module for abricate * rename abricate/abricate to abricate/run * Update test.yml Co-authored-by: Sateesh <33637490+sateeshperi@users.noreply.github.com> --- modules/abricate/run/main.nf | 34 +++++++++++++++ modules/abricate/run/meta.yml | 43 +++++++++++++++++++ modules/abricate/summary/main.nf | 33 ++++++++++++++ modules/abricate/summary/meta.yml | 43 +++++++++++++++++++ tests/config/pytest_modules.yml | 8 ++++ tests/modules/abricate/run/main.nf | 15 +++++++ tests/modules/abricate/run/nextflow.config | 5 +++ tests/modules/abricate/run/test.yml | 10 +++++ tests/modules/abricate/summary/main.nf | 21 +++++++++ .../modules/abricate/summary/nextflow.config | 5 +++ tests/modules/abricate/summary/test.yml | 14 ++++++ 11 files changed, 231 insertions(+) create mode 100644 modules/abricate/run/main.nf create mode 100644 modules/abricate/run/meta.yml create mode 100644 modules/abricate/summary/main.nf create mode 100644 modules/abricate/summary/meta.yml create mode 100644 tests/modules/abricate/run/main.nf create mode 100644 tests/modules/abricate/run/nextflow.config create mode 100644 tests/modules/abricate/run/test.yml create mode 100644 tests/modules/abricate/summary/main.nf create mode 100644 tests/modules/abricate/summary/nextflow.config create mode 100644 tests/modules/abricate/summary/test.yml diff --git a/modules/abricate/run/main.nf b/modules/abricate/run/main.nf new file mode 100644 index 00000000..11992791 --- /dev/null +++ b/modules/abricate/run/main.nf @@ -0,0 +1,34 @@ +process ABRICATE_RUN { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::abricate=1.0.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/abricate%3A1.0.1--ha8f3691_1': + 'quay.io/biocontainers/abricate:1.0.1--ha8f3691_1' }" + + input: + tuple val(meta), path(assembly) + + output: + tuple val(meta), path("*.txt"), emit: report + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + abricate \\ + $assembly \\ + $args \\ + --threads $task.cpus > ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + abricate: \$(echo \$(abricate --version 2>&1) | sed 's/^.*abricate //' ) + END_VERSIONS + """ +} diff --git a/modules/abricate/run/meta.yml b/modules/abricate/run/meta.yml new file mode 100644 index 00000000..2464d03e --- /dev/null +++ b/modules/abricate/run/meta.yml @@ -0,0 +1,43 @@ +name: abricate_run +description: Screen assemblies for antimicrobial resistance against multiple databases +keywords: + - bacteria + - assembly + - antimicrobial reistance +tools: + - abricate: + description: Mass screening of contigs for antibiotic resistance genes + homepage: https://github.com/tseemann/abricate + documentation: https://github.com/tseemann/abricate + tool_dev_url: https://github.com/tseemann/abricate + doi: "" + licence: ['GPL v2', 'GPL v2'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - assembly: + type: file + description: FASTA, GenBank or EMBL formatted file + pattern: "*.{fa,fasta,fna,fa.gz,fasta.gz,fna.gz,gbk,gbk.gz,embl,embl.gz}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - report: + type: file + description: Tab-delimited report of results + pattern: "*.{txt}" + +authors: + - "@rpetit3" diff --git a/modules/abricate/summary/main.nf b/modules/abricate/summary/main.nf new file mode 100644 index 00000000..b3ba4d58 --- /dev/null +++ b/modules/abricate/summary/main.nf @@ -0,0 +1,33 @@ +process ABRICATE_SUMMARY { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::abricate=1.0.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/abricate%3A1.0.1--ha8f3691_1': + 'quay.io/biocontainers/abricate:1.0.1--ha8f3691_1' }" + + input: + tuple val(meta), path(reports) + + output: + tuple val(meta), path("*.txt"), emit: report + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + abricate \\ + --summary \\ + $reports > ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + abricate: \$(echo \$(abricate --version 2>&1) | sed 's/^.*abricate //' ) + END_VERSIONS + """ +} diff --git a/modules/abricate/summary/meta.yml b/modules/abricate/summary/meta.yml new file mode 100644 index 00000000..b02ba930 --- /dev/null +++ b/modules/abricate/summary/meta.yml @@ -0,0 +1,43 @@ +name: abricate_summary +description: Screen assemblies for antimicrobial resistance against multiple databases +keywords: + - bacteria + - assembly + - antimicrobial reistance +tools: + - abricate: + description: Mass screening of contigs for antibiotic resistance genes + homepage: https://github.com/tseemann/abricate + documentation: https://github.com/tseemann/abricate + tool_dev_url: https://github.com/tseemann/abricate + doi: "" + licence: ['GPL v2', 'GPL v2'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - assembly: + type: file + description: FASTA, GenBank or EMBL formatted file + pattern: "*.{fa,fasta,fna,fa.gz,fasta.gz,fna.gz,gbk,gbk.gz,embl,embl.gz}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - summary: + type: file + description: Tab-delimited report of aggregated results + pattern: "*.{txt}" + +authors: + - "@rpetit3" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index a053a57c..8752765f 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -2,6 +2,14 @@ abacas: - modules/abacas/** - tests/modules/abacas/** +abricate/run: + - modules/abricate/run/** + - tests/modules/abricate/run/** + +abricate/summary: + - modules/abricate/summary/** + - tests/modules/abricate/summary/** + adapterremoval: - modules/adapterremoval/** - tests/modules/adapterremoval/** diff --git a/tests/modules/abricate/run/main.nf b/tests/modules/abricate/run/main.nf new file mode 100644 index 00000000..19d1f8a8 --- /dev/null +++ b/tests/modules/abricate/run/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { ABRICATE_RUN } from '../../../../modules/abricate/run/main.nf' + +workflow test_abricate_run { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true) + ] + + ABRICATE_RUN ( input ) +} diff --git a/tests/modules/abricate/run/nextflow.config b/tests/modules/abricate/run/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/abricate/run/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/abricate/run/test.yml b/tests/modules/abricate/run/test.yml new file mode 100644 index 00000000..3377623c --- /dev/null +++ b/tests/modules/abricate/run/test.yml @@ -0,0 +1,10 @@ +- name: abricate run + command: nextflow run tests/modules/abricate/run -entry test_abricate_run -c tests/config/nextflow.config + tags: + - abricate + - abricate/run + files: + - path: output/abricate/test.txt + md5sum: cd07e2953b127aed8d09bf1b2b903a1f + - path: output/abricate/versions.yml + md5sum: ae9cafaae96a644bb852e337aa7251f3 diff --git a/tests/modules/abricate/summary/main.nf b/tests/modules/abricate/summary/main.nf new file mode 100644 index 00000000..9acbdba9 --- /dev/null +++ b/tests/modules/abricate/summary/main.nf @@ -0,0 +1,21 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { ABRICATE_RUN } from '../../../../modules/abricate/run/main.nf' +include { ABRICATE_SUMMARY } from '../../../../modules/abricate/summary/main.nf' + +workflow test_abricate_summary { + + inputs = [ + tuple([ id:'test1', single_end:false ], // meta map + file(params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true)), + tuple([ id:'test2', single_end:false ], + file(params.test_data['haemophilus_influenzae']['genome']['genome_fna_gz'], checkIfExists: true)) + ] + + ABRICATE_RUN ( Channel.fromList(inputs) ) + ABRICATE_SUMMARY ( + ABRICATE_RUN.out.report.collect{ meta, report -> report }.map{ report -> [[ id: 'test_summary'], report]} + ) +} diff --git a/tests/modules/abricate/summary/nextflow.config b/tests/modules/abricate/summary/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/abricate/summary/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/abricate/summary/test.yml b/tests/modules/abricate/summary/test.yml new file mode 100644 index 00000000..abbd747b --- /dev/null +++ b/tests/modules/abricate/summary/test.yml @@ -0,0 +1,14 @@ +- name: abricate summary + command: nextflow run tests/modules/abricate/summary -entry test_abricate_summary -c tests/config/nextflow.config + tags: + - abricate + - abricate/summary + files: + - path: output/abricate/test1.txt + md5sum: cd07e2953b127aed8d09bf1b2b903a1f + - path: output/abricate/test2.txt + md5sum: 69af3321b0bc808b7ef85f102395736f + - path: output/abricate/test_summary.txt + md5sum: a4ec7010e75404ce3a1033f0c4b4a7f9 + - path: output/abricate/versions.yml + md5sum: a18f0471c49e5f25ec0b0c4ad5fab08e From f655e5dea2e25f403e23fb6dfdc33cc538666769 Mon Sep 17 00:00:00 2001 From: Sateesh <33637490+sateeshperi@users.noreply.github.com> Date: Fri, 18 Feb 2022 20:18:05 -0500 Subject: [PATCH 08/15] Picard fixmateinformation (#1315) * add picard-fixmateinformation * add picard-fixmateinformation * fix trailing whitespace * fix trailing whitespace Co-authored-by: Peri Co-authored-by: Robert A. Petit III --- modules/picard/fixmateinformation/main.nf | 43 ++++++++++++++++++ modules/picard/fixmateinformation/meta.yml | 44 +++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ .../modules/picard/fixmateinformation/main.nf | 15 +++++++ .../picard/fixmateinformation/nextflow.config | 5 +++ .../picard/fixmateinformation/test.yml | 10 +++++ 6 files changed, 121 insertions(+) create mode 100644 modules/picard/fixmateinformation/main.nf create mode 100644 modules/picard/fixmateinformation/meta.yml create mode 100644 tests/modules/picard/fixmateinformation/main.nf create mode 100644 tests/modules/picard/fixmateinformation/nextflow.config create mode 100644 tests/modules/picard/fixmateinformation/test.yml diff --git a/modules/picard/fixmateinformation/main.nf b/modules/picard/fixmateinformation/main.nf new file mode 100644 index 00000000..763f3bb4 --- /dev/null +++ b/modules/picard/fixmateinformation/main.nf @@ -0,0 +1,43 @@ +process PICARD_FIXMATEINFORMATION { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::picard=2.26.9" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:2.26.9--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.26.9--hdfd78af_0' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def STRINGENCY = task.ext.stringency ?: "STRICT" + def avail_mem = 3 + if (!task.memory) { + log.info '[Picard FixMateInformation] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + picard \\ + FixMateInformation \\ + -Xmx${avail_mem}g \\ + -I ${bam} \\ + -O ${prefix}.bam \\ + --VALIDATION_STRINGENCY ${STRINGENCY} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(picard FixMateInformation --version 2>&1 | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ +} diff --git a/modules/picard/fixmateinformation/meta.yml b/modules/picard/fixmateinformation/meta.yml new file mode 100644 index 00000000..c01d803c --- /dev/null +++ b/modules/picard/fixmateinformation/meta.yml @@ -0,0 +1,44 @@ +name: picard_fixmateinformation +description: Verify mate-pair information between mates and fix if needed +keywords: + - mate-pair +tools: + - picard: + description: | + A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) + data and formats such as SAM/BAM/CRAM and VCF. + homepage: https://broadinstitute.github.io/picard/ + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360036713471-FixMateInformation-Picard- + tool_dev_url: https://github.com/broadinstitute/picard + licence: ['MIT'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file + pattern: "*.{bam}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam: + type: file + description: mate-pair verified BAM file + pattern: "*.{bam}" + +authors: + - "@sateeshperi" + - "@mjcipriano" + - "@hseabolt" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 8752765f..99e8f2b5 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1173,6 +1173,10 @@ picard/filtersamreads: - modules/picard/filtersamreads/** - tests/modules/picard/filtersamreads/** +picard/fixmateinformation: + - modules/picard/fixmateinformation/** + - tests/modules/picard/fixmateinformation/** + picard/markduplicates: - modules/picard/markduplicates/** - tests/modules/picard/markduplicates/** diff --git a/tests/modules/picard/fixmateinformation/main.nf b/tests/modules/picard/fixmateinformation/main.nf new file mode 100644 index 00000000..46b4248b --- /dev/null +++ b/tests/modules/picard/fixmateinformation/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { PICARD_FIXMATEINFORMATION } from '../../../../modules/picard/fixmateinformation/main.nf' + +workflow test_picard_fixmateinformation { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) + ] + + PICARD_FIXMATEINFORMATION ( input ) +} diff --git a/tests/modules/picard/fixmateinformation/nextflow.config b/tests/modules/picard/fixmateinformation/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/picard/fixmateinformation/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/picard/fixmateinformation/test.yml b/tests/modules/picard/fixmateinformation/test.yml new file mode 100644 index 00000000..f12f823b --- /dev/null +++ b/tests/modules/picard/fixmateinformation/test.yml @@ -0,0 +1,10 @@ +- name: picard fixmateinformation test_picard_fixmateinformation + command: nextflow run tests/modules/picard/fixmateinformation -entry test_picard_fixmateinformation -c tests/config/nextflow.config + tags: + - picard + - picard/fixmateinformation + files: + - path: output/picard/test.bam + md5sum: 746102e8c242c0ef42e045c49d320030 + - path: output/picard/versions.yml + md5sum: 4329ba7cdca8f4f6018dfd5c019ba2eb From 62e5d1f0b38367d628e6b4c32b45d462f2c0b325 Mon Sep 17 00:00:00 2001 From: Sateesh <33637490+sateeshperi@users.noreply.github.com> Date: Fri, 18 Feb 2022 20:49:12 -0500 Subject: [PATCH 09/15] Picard createsequencedictionary (#1310) * add picard/createsequencedictionary module * add picard-CreateSequenceDictionary * add picard/createsequencedictionary/ * add contains to test yml * update test yml contains * update test yml contains Co-authored-by: Peri Co-authored-by: Robert A. Petit III --- .../picard/createsequencedictionary/main.nf | 42 +++++++++++++++++ .../picard/createsequencedictionary/meta.yml | 45 +++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ .../picard/createsequencedictionary/main.nf | 15 +++++++ .../createsequencedictionary/nextflow.config | 5 +++ .../picard/createsequencedictionary/test.yml | 10 +++++ 6 files changed, 121 insertions(+) create mode 100644 modules/picard/createsequencedictionary/main.nf create mode 100644 modules/picard/createsequencedictionary/meta.yml create mode 100644 tests/modules/picard/createsequencedictionary/main.nf create mode 100644 tests/modules/picard/createsequencedictionary/nextflow.config create mode 100644 tests/modules/picard/createsequencedictionary/test.yml diff --git a/modules/picard/createsequencedictionary/main.nf b/modules/picard/createsequencedictionary/main.nf new file mode 100644 index 00000000..96069e9f --- /dev/null +++ b/modules/picard/createsequencedictionary/main.nf @@ -0,0 +1,42 @@ +process PICARD_CREATESEQUENCEDICTIONARY { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::picard=2.26.9" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:2.26.9--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.26.9--hdfd78af_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.dict"), emit: reference_dict + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3 + if (!task.memory) { + log.info '[Picard CreateSequenceDictionary] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + picard \\ + -Xmx${avail_mem}g \\ + CreateSequenceDictionary \\ + $args \\ + R=$fasta \\ + O=${prefix}.dict + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(picard CreateSequenceDictionary --version 2>&1 | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ +} diff --git a/modules/picard/createsequencedictionary/meta.yml b/modules/picard/createsequencedictionary/meta.yml new file mode 100644 index 00000000..f40a4dbc --- /dev/null +++ b/modules/picard/createsequencedictionary/meta.yml @@ -0,0 +1,45 @@ +name: picard_createsequencedictionary +description: Creates a sequence dictionary for a reference sequence. +keywords: + - sequence + - dictionary + - picard +tools: + - picard: + description: | + Creates a sequence dictionary file (with ".dict" extension) from a reference sequence provided in FASTA format, which is required by many processing and analysis tools. The output file contains a header but no SAMRecords, and the header contains only sequence records. + homepage: https://broadinstitute.github.io/picard/ + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360036712531-CreateSequenceDictionary-Picard- + tool_dev_url: https://github.com/broadinstitute/picard + licence: ['MIT'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - dict: + type: file + description: picard dictionary file + pattern: "*.{dict}" + +authors: + - "@sateeshperi" + - "@mjcipriano" + - "@hseabolt" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 99e8f2b5..52f8d625 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1169,6 +1169,10 @@ picard/collectwgsmetrics: - modules/picard/collectwgsmetrics/** - tests/modules/picard/collectwgsmetrics/** +picard/createsequencedictionary: + - modules/picard/createsequencedictionary/** + - tests/modules/picard/createsequencedictionary/** + picard/filtersamreads: - modules/picard/filtersamreads/** - tests/modules/picard/filtersamreads/** diff --git a/tests/modules/picard/createsequencedictionary/main.nf b/tests/modules/picard/createsequencedictionary/main.nf new file mode 100644 index 00000000..4dc24c8a --- /dev/null +++ b/tests/modules/picard/createsequencedictionary/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { PICARD_CREATESEQUENCEDICTIONARY } from '../../../../modules/picard/createsequencedictionary/main.nf' + +workflow test_picard_createsequencedictionary { + + input = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + + PICARD_CREATESEQUENCEDICTIONARY ( input ) +} diff --git a/tests/modules/picard/createsequencedictionary/nextflow.config b/tests/modules/picard/createsequencedictionary/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/picard/createsequencedictionary/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/picard/createsequencedictionary/test.yml b/tests/modules/picard/createsequencedictionary/test.yml new file mode 100644 index 00000000..3c9d0e7e --- /dev/null +++ b/tests/modules/picard/createsequencedictionary/test.yml @@ -0,0 +1,10 @@ +- name: picard createsequencedictionary test_picard_createsequencedictionary + command: nextflow run tests/modules/picard/createsequencedictionary -entry test_picard_createsequencedictionary -c tests/config/nextflow.config + tags: + - picard/createsequencedictionary + - picard + files: + - path: output/picard/test.dict + contains: ['SN:MT192765.1'] + - path: output/picard/versions.yml + md5sum: b3d8c7ea65b8a6d3237b153d13fe2014 From 927dbfed048469e945f93236d213011e45672fba Mon Sep 17 00:00:00 2001 From: Sateesh <33637490+sateeshperi@users.noreply.github.com> Date: Fri, 18 Feb 2022 21:17:58 -0500 Subject: [PATCH 10/15] Picard cleansam (#1306) * add picard-cleansam * add picard/cleansam * update test yml with output * picard 2.26.10 -> 2.26.9 * add output to test yml Co-authored-by: Peri Co-authored-by: Robert A. Petit III --- modules/picard/cleansam/main.nf | 44 ++++++++++++++++++ modules/picard/cleansam/meta.yml | 46 +++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/picard/cleansam/main.nf | 15 ++++++ tests/modules/picard/cleansam/nextflow.config | 5 ++ tests/modules/picard/cleansam/test.yml | 10 ++++ 6 files changed, 124 insertions(+) create mode 100644 modules/picard/cleansam/main.nf create mode 100644 modules/picard/cleansam/meta.yml create mode 100644 tests/modules/picard/cleansam/main.nf create mode 100644 tests/modules/picard/cleansam/nextflow.config create mode 100644 tests/modules/picard/cleansam/test.yml diff --git a/modules/picard/cleansam/main.nf b/modules/picard/cleansam/main.nf new file mode 100644 index 00000000..2eb171d5 --- /dev/null +++ b/modules/picard/cleansam/main.nf @@ -0,0 +1,44 @@ +process PICARD_CLEANSAM { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::picard=2.26.9" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:2.26.9--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.26.9--hdfd78af_0' }" + + input: + tuple val(meta), path(sam) + + output: + tuple val(meta), path("*.sam"), emit: sam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def STRINGENCY = task.ext.stringency ?: "STRICT" + def avail_mem = 3 + if (!task.memory) { + log.info '[Picard CleanSam] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + picard \\ + -Xmx${avail_mem}g \\ + CleanSam \\ + ${args} \\ + -I ${sam} \\ + -O ${prefix}.sam \\ + --VALIDATION_STRINGENCY ${STRINGENCY} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(picard CleanSam --version 2>&1 | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ +} diff --git a/modules/picard/cleansam/meta.yml b/modules/picard/cleansam/meta.yml new file mode 100644 index 00000000..d22e1742 --- /dev/null +++ b/modules/picard/cleansam/meta.yml @@ -0,0 +1,46 @@ +name: picard_cleansam +description: Cleans the provided SAM/BAM, soft-clipping beyond-end-of-reference alignments and setting MAPQ to 0 for unmapped reads +keywords: + - clean + - sam + - bam +tools: + - picard: + description: | + A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) + data and formats such as SAM/BAM/CRAM and VCF. + homepage: https://broadinstitute.github.io/picard/ + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360036491452-CleanSam-Picard- + tool_dev_url: https://github.com/broadinstitute/picard + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - sam: + type: file + description: SAM file + pattern: "*.{sam}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - sam: + type: file + description: Cleaned SAM file + pattern: "*.{sam}" + +authors: + - "@sateeshperi" + - "@mjcipriano" + - "@hseabolt" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 52f8d625..3d857d2c 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1157,6 +1157,10 @@ phyloflash: - modules/phyloflash/** - tests/modules/phyloflash/** +picard/cleansam: + - modules/picard/cleansam/** + - tests/modules/picard/cleansam/** + picard/collecthsmetrics: - modules/picard/collecthsmetrics/** - tests/modules/picard/collecthsmetrics/** diff --git a/tests/modules/picard/cleansam/main.nf b/tests/modules/picard/cleansam/main.nf new file mode 100644 index 00000000..f189b0f3 --- /dev/null +++ b/tests/modules/picard/cleansam/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { PICARD_CLEANSAM } from '../../../../modules/picard/cleansam/main.nf' + +workflow test_picard_cleansam { + + input = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_bam'], checkIfExists: true) + ] + + PICARD_CLEANSAM ( input ) +} diff --git a/tests/modules/picard/cleansam/nextflow.config b/tests/modules/picard/cleansam/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/picard/cleansam/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/picard/cleansam/test.yml b/tests/modules/picard/cleansam/test.yml new file mode 100644 index 00000000..716dfe6a --- /dev/null +++ b/tests/modules/picard/cleansam/test.yml @@ -0,0 +1,10 @@ +- name: picard cleansam test_picard_cleansam + command: nextflow run tests/modules/picard/cleansam -entry test_picard_cleansam -c tests/config/nextflow.config + tags: + - picard/cleansam + - picard + files: + - path: output/picard/test.sam + md5sum: e314171a6060eb79947c13ad126ddf00 + - path: output/picard/versions.yml + md5sum: e6457d7c6de51bf6f4b577eda65e57ac From a0d91e4a93daab335e8dbae31b345188c120e0a0 Mon Sep 17 00:00:00 2001 From: Sateesh <33637490+sateeshperi@users.noreply.github.com> Date: Fri, 18 Feb 2022 21:59:06 -0500 Subject: [PATCH 11/15] Picard addorreplacereadgroups (#1305) * add picard-addorreplacereadgroups * add picard_addorreplacereadgroups * add new line to main.nf * remove trailing whitespaces * remove trailing whitespaces * change to output in test yml * add when directive * picard 2.26.10 -> 2.26.9 * picard 2.26.10 -> 2.26.9 test yml Co-authored-by: Peri Co-authored-by: Robert A. Petit III --- modules/picard/addorreplacereadgroups/main.nf | 53 +++++++++++++++++++ .../picard/addorreplacereadgroups/meta.yml | 46 ++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ .../picard/addorreplacereadgroups/main.nf | 15 ++++++ .../addorreplacereadgroups/nextflow.config | 5 ++ .../picard/addorreplacereadgroups/test.yml | 10 ++++ 6 files changed, 133 insertions(+) create mode 100644 modules/picard/addorreplacereadgroups/main.nf create mode 100644 modules/picard/addorreplacereadgroups/meta.yml create mode 100644 tests/modules/picard/addorreplacereadgroups/main.nf create mode 100644 tests/modules/picard/addorreplacereadgroups/nextflow.config create mode 100644 tests/modules/picard/addorreplacereadgroups/test.yml diff --git a/modules/picard/addorreplacereadgroups/main.nf b/modules/picard/addorreplacereadgroups/main.nf new file mode 100644 index 00000000..8e1d10af --- /dev/null +++ b/modules/picard/addorreplacereadgroups/main.nf @@ -0,0 +1,53 @@ +process PICARD_ADDORREPLACEREADGROUPS { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::picard=2.26.9" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:2.26.9--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.26.9--hdfd78af_0' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def ID = task.ext.id ?: "id" + def LIBRARY= task.ext.library ?: "library" + def PLATFORM= task.ext.platform ?: "illumina" + def BARCODE= task.ext.barcode ?: "barcode" + def SAMPLE= task.ext.sample ?: "sample" + def INDEX= task.ext.index ?: "index" + def avail_mem = 3 + if (!task.memory) { + log.info '[Picard AddOrReplaceReadGroups] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + picard \\ + AddOrReplaceReadGroups \\ + -Xmx${avail_mem}g \\ + --INPUT ${bam} \\ + --OUTPUT ${prefix}.bam \\ + -ID ${ID} \\ + -LB ${LIBRARY} \\ + -PL ${PLATFORM} \\ + -PU ${BARCODE} \\ + -SM ${SAMPLE} \\ + -CREATE_INDEX true + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(picard AddOrReplaceReadGroups --version 2>&1 | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ +} diff --git a/modules/picard/addorreplacereadgroups/meta.yml b/modules/picard/addorreplacereadgroups/meta.yml new file mode 100644 index 00000000..bdb5725c --- /dev/null +++ b/modules/picard/addorreplacereadgroups/meta.yml @@ -0,0 +1,46 @@ +name: picard_addorreplacereadgroups +description: Assigns all the reads in a file to a single new read-group +keywords: + - add + - replace + - read-group +tools: + - picard: + description: | + A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) + data and formats such as SAM/BAM/CRAM and VCF. + homepage: https://broadinstitute.github.io/picard/ + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037226472-AddOrReplaceReadGroups-Picard- + tool_dev_url: https://github.com/broadinstitute/picard + licence: ['MIT'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Input BAM file + pattern: "*.{bam}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam: + type: file + description: Output BAM file + pattern: "*.{bam}" + +authors: + - "@sateeshperi" + - "@mjcipriano" + - "@hseabolt" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 3d857d2c..e6773293 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1157,6 +1157,10 @@ phyloflash: - modules/phyloflash/** - tests/modules/phyloflash/** +picard/addorreplacereadgroups: + - modules/picard/addorreplacereadgroups/** + - tests/modules/picard/addorreplacereadgroups/** + picard/cleansam: - modules/picard/cleansam/** - tests/modules/picard/cleansam/** diff --git a/tests/modules/picard/addorreplacereadgroups/main.nf b/tests/modules/picard/addorreplacereadgroups/main.nf new file mode 100644 index 00000000..d5148c36 --- /dev/null +++ b/tests/modules/picard/addorreplacereadgroups/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { PICARD_ADDORREPLACEREADGROUPS } from '../../../../modules/picard/addorreplacereadgroups/main.nf' + +workflow test_picard_addorreplacereadgroups { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) + ] + + PICARD_ADDORREPLACEREADGROUPS ( input ) +} diff --git a/tests/modules/picard/addorreplacereadgroups/nextflow.config b/tests/modules/picard/addorreplacereadgroups/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/picard/addorreplacereadgroups/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/picard/addorreplacereadgroups/test.yml b/tests/modules/picard/addorreplacereadgroups/test.yml new file mode 100644 index 00000000..aa1536bb --- /dev/null +++ b/tests/modules/picard/addorreplacereadgroups/test.yml @@ -0,0 +1,10 @@ +- name: picard addorreplacereadgroups test_picard_addorreplacereadgroups + command: nextflow run tests/modules/picard/addorreplacereadgroups -entry test_picard_addorreplacereadgroups -c tests/config/nextflow.config + tags: + - picard + - picard/addorreplacereadgroups + files: + - path: output/picard/test.bam + md5sum: 7b82f3461c2d80fc6a10385e78c9427f + - path: output/picard/versions.yml + md5sum: 8a2d176295e1343146ea433c79bb517f From 15d39f841af22581cfa472eafea37810dc6293c1 Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Sat, 19 Feb 2022 16:02:23 -0700 Subject: [PATCH 12/15] add amrfinderplus module (#1284) * add amrfinderplus module * Update test.yml * Update main.nf * Update main.nf * Update test.yml * Update test.yml * Update test.yml * Update main.nf * Update meta.yml * Update meta.yml * Update main.nf * Update test.yml * Update test.yml --- modules/amrfinderplus/run/main.nf | 55 +++++++++++++++++++ modules/amrfinderplus/run/meta.yml | 51 +++++++++++++++++ modules/amrfinderplus/update/main.nf | 29 ++++++++++ modules/amrfinderplus/update/meta.yml | 37 +++++++++++++ tests/config/pytest_modules.yml | 8 +++ tests/modules/amrfinderplus/run/main.nf | 17 ++++++ .../modules/amrfinderplus/run/nextflow.config | 5 ++ tests/modules/amrfinderplus/run/test.yml | 11 ++++ tests/modules/amrfinderplus/update/main.nf | 11 ++++ .../amrfinderplus/update/nextflow.config | 5 ++ tests/modules/amrfinderplus/update/test.yml | 9 +++ 11 files changed, 238 insertions(+) create mode 100644 modules/amrfinderplus/run/main.nf create mode 100644 modules/amrfinderplus/run/meta.yml create mode 100644 modules/amrfinderplus/update/main.nf create mode 100644 modules/amrfinderplus/update/meta.yml create mode 100644 tests/modules/amrfinderplus/run/main.nf create mode 100644 tests/modules/amrfinderplus/run/nextflow.config create mode 100644 tests/modules/amrfinderplus/run/test.yml create mode 100644 tests/modules/amrfinderplus/update/main.nf create mode 100644 tests/modules/amrfinderplus/update/nextflow.config create mode 100644 tests/modules/amrfinderplus/update/test.yml diff --git a/modules/amrfinderplus/run/main.nf b/modules/amrfinderplus/run/main.nf new file mode 100644 index 00000000..47a8fb46 --- /dev/null +++ b/modules/amrfinderplus/run/main.nf @@ -0,0 +1,55 @@ +process AMRFINDERPLUS_RUN { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::ncbi-amrfinderplus=3.10.23" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ncbi-amrfinderplus%3A3.10.23--h17dc2d4_0': + 'quay.io/biocontainers/ncbi-amrfinderplus:3.10.23--h17dc2d4_0' }" + + input: + tuple val(meta), path(fasta) + path db + + output: + tuple val(meta), path("${prefix}.tsv") , emit: report + tuple val(meta), path("${prefix}-mutations.tsv"), emit: mutation_report, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def is_compressed = fasta.getName().endsWith(".gz") ? true : false + prefix = task.ext.prefix ?: "${meta.id}" + organism_param = meta.containsKey("organism") ? "--organism ${meta.organism} --mutation_all ${prefix}-mutations.tsv" : "" + fasta_name = fasta.getName().replace(".gz", "") + fasta_param = "-n" + if (meta.containsKey("is_proteins")) { + if (meta.is_proteins) { + fasta_param = "-p" + } + } + """ + if [ "$is_compressed" == "true" ]; then + gzip -c -d $fasta > $fasta_name + fi + + mkdir amrfinderdb + tar xzvf $db -C amrfinderdb + + amrfinder \\ + $fasta_param $fasta_name \\ + $organism_param \\ + $args \\ + --database amrfinderdb \\ + --threads $task.cpus > ${prefix}.tsv + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + amrfinderplus: \$(amrfinder --version) + END_VERSIONS + """ +} diff --git a/modules/amrfinderplus/run/meta.yml b/modules/amrfinderplus/run/meta.yml new file mode 100644 index 00000000..b0f3b8fa --- /dev/null +++ b/modules/amrfinderplus/run/meta.yml @@ -0,0 +1,51 @@ +name: amrfinderplus_run +description: Identify antimicrobial resistance in gene or protein sequences +keywords: + - bacteria + - fasta + - antibiotic resistance +tools: + - amrfinderplus: + description: AMRFinderPlus finds antimicrobial resistance and other genes in protein or nucleotide sequences. + homepage: https://github.com/ncbi/amr/wiki + documentation: https://github.com/ncbi/amr/wiki + tool_dev_url: https://github.com/ncbi/amr + doi: "10.1038/s41598-021-91456-0" + licence: ['Public Domain'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Nucleotide or protein sequences in FASTA format + pattern: "*.{fasta,fasta.gz,fa,fa.gz,fna,fna.gz,faa,faa.gz}" + - db: + type: file + description: A compressed tarball of the AMRFinderPlus database to query + pattern: "*.tar.gz" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - report: + type: file + description: AMRFinder+ final report + pattern: "*.tsv" + - mutation_report: + type: file + description: Report of organism-specific point-mutations + pattern: "*-mutations.tsv" + +authors: + - "@rpetit3" diff --git a/modules/amrfinderplus/update/main.nf b/modules/amrfinderplus/update/main.nf new file mode 100644 index 00000000..ad09c391 --- /dev/null +++ b/modules/amrfinderplus/update/main.nf @@ -0,0 +1,29 @@ +process AMRFINDERPLUS_UPDATE { + tag "update" + label 'process_low' + + conda (params.enable_conda ? "bioconda::ncbi-amrfinderplus=3.10.23" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ncbi-amrfinderplus%3A3.10.23--h17dc2d4_0': + 'quay.io/biocontainers/ncbi-amrfinderplus:3.10.23--h17dc2d4_0' }" + + output: + path "amrfinderdb.tar.gz", emit: db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + mkdir amrfinderdb + amrfinder_update -d amrfinderdb + tar czvf amrfinderdb.tar.gz -C \$(readlink amrfinderdb/latest) ./ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + amrfinderplus: \$(amrfinder --version) + END_VERSIONS + """ +} diff --git a/modules/amrfinderplus/update/meta.yml b/modules/amrfinderplus/update/meta.yml new file mode 100644 index 00000000..84fee3df --- /dev/null +++ b/modules/amrfinderplus/update/meta.yml @@ -0,0 +1,37 @@ +name: amrfinderplus_update +description: Identify antimicrobial resistance in gene or protein sequences +keywords: + - bacteria + - fasta + - antibiotic resistance +tools: + - amrfinderplus: + description: AMRFinderPlus finds antimicrobial resistance and other genes in protein or nucleotide sequences. + homepage: https://github.com/ncbi/amr/wiki + documentation: https://github.com/ncbi/amr/wiki + tool_dev_url: https://github.com/ncbi/amr + doi: "10.1038/s41598-021-91456-0" + licence: ['Public Domain'] + +input: + - input_not_required: + type: null + description: module does not have an input + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - db: + type: file + description: The latest AMRFinder+ database in a compressed tarball + pattern: "*.tar.gz" + +authors: + - "@rpetit3" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index e6773293..93903a65 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -26,6 +26,14 @@ amps: - modules/amps/** - tests/modules/amps/** +amrfinderplus/run: + - modules/amrfinderplus/run/** + - tests/modules/amrfinderplus/run/** + +amrfinderplus/update: + - modules/amrfinderplus/update/** + - tests/modules/amrfinderplus/update/** + arriba: - modules/arriba/** - tests/modules/arriba/** diff --git a/tests/modules/amrfinderplus/run/main.nf b/tests/modules/amrfinderplus/run/main.nf new file mode 100644 index 00000000..917ac28d --- /dev/null +++ b/tests/modules/amrfinderplus/run/main.nf @@ -0,0 +1,17 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { AMRFINDERPLUS_UPDATE } from '../../../../modules/amrfinderplus/update/main.nf' +include { AMRFINDERPLUS_RUN } from '../../../../modules/amrfinderplus/run/main.nf' + +workflow test_amrfinderplus_run { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['haemophilus_influenzae']['genome']['genome_fna_gz'], checkIfExists: true) + ] + + AMRFINDERPLUS_UPDATE ( ) + AMRFINDERPLUS_RUN ( input, AMRFINDERPLUS_UPDATE.out.db ) +} diff --git a/tests/modules/amrfinderplus/run/nextflow.config b/tests/modules/amrfinderplus/run/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/amrfinderplus/run/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/amrfinderplus/run/test.yml b/tests/modules/amrfinderplus/run/test.yml new file mode 100644 index 00000000..d858cb88 --- /dev/null +++ b/tests/modules/amrfinderplus/run/test.yml @@ -0,0 +1,11 @@ +- name: amrfinderplus run test_amrfinderplus_run + command: nextflow run tests/modules/amrfinderplus/run -entry test_amrfinderplus_run -c tests/config/nextflow.config + tags: + - amrfinderplus/run + - amrfinderplus + files: + - path: output/amrfinderplus/amrfinderdb.tar.gz + - path: output/amrfinderplus/test.tsv + md5sum: b4d261ace9be7d013c19d1f5c0005bfe + - path: output/amrfinderplus/versions.yml + md5sum: 642ca04a07d79fe4c4d02348562e3961 diff --git a/tests/modules/amrfinderplus/update/main.nf b/tests/modules/amrfinderplus/update/main.nf new file mode 100644 index 00000000..f3a34ea0 --- /dev/null +++ b/tests/modules/amrfinderplus/update/main.nf @@ -0,0 +1,11 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { AMRFINDERPLUS_UPDATE } from '../../../../modules/amrfinderplus/update/main.nf' + +workflow test_amrfinderplus_update { + + AMRFINDERPLUS_UPDATE ( ) + +} diff --git a/tests/modules/amrfinderplus/update/nextflow.config b/tests/modules/amrfinderplus/update/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/amrfinderplus/update/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/amrfinderplus/update/test.yml b/tests/modules/amrfinderplus/update/test.yml new file mode 100644 index 00000000..7ae89992 --- /dev/null +++ b/tests/modules/amrfinderplus/update/test.yml @@ -0,0 +1,9 @@ +- name: amrfinderplus update test_amrfinderplus_update + command: nextflow run tests/modules/amrfinderplus/update -entry test_amrfinderplus_update -c tests/config/nextflow.config + tags: + - amrfinderplus + - amrfinderplus/update + files: + - path: output/amrfinderplus/amrfinderdb.tar.gz + - path: output/amrfinderplus/versions.yml + md5sum: 4db18fa509309db4da0920a7eeaba86c From 2597c31d6bba4a354fc58b535c76bd14aed1c585 Mon Sep 17 00:00:00 2001 From: Mei Wu <25568561+projectoriented@users.noreply.github.com> Date: Mon, 21 Feb 2022 11:49:21 +0100 Subject: [PATCH 13/15] refactored vcfanno (#1266) * final refactor: using tarball instead --- modules/vcfanno/main.nf | 15 +++++++++----- modules/vcfanno/meta.yml | 26 +++++++++++++---------- tests/config/test_data.config | 4 +++- tests/modules/vcfanno/main.nf | 30 +++++++++++++++++++++++---- tests/modules/vcfanno/nextflow.config | 2 +- tests/modules/vcfanno/test.yml | 18 ++++++++++++++-- 6 files changed, 71 insertions(+), 24 deletions(-) diff --git a/modules/vcfanno/main.nf b/modules/vcfanno/main.nf index 77275963..51b1ec5b 100644 --- a/modules/vcfanno/main.nf +++ b/modules/vcfanno/main.nf @@ -9,11 +9,13 @@ process VCFANNO { input: tuple val(meta), path(vcf), path(tbi) - path vcfanno_config + tuple val(meta), path(vcf_uncompressed) + path toml + path resource_dir output: - tuple val(meta), path("*.vcf"), emit: vcf - path "versions.yml" , emit: versions + tuple val(meta), path("*_annotated.vcf"), emit: vcf + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -21,12 +23,15 @@ process VCFANNO { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def input_vcf = vcf_uncompressed ?: vcf """ + ln -sf $resource_dir/* \$(pwd) + vcfanno \\ -p $task.cpus \\ $args \\ - $vcfanno_config \\ - $vcf \\ + $toml \\ + $input_vcf \\ > ${prefix}_annotated.vcf cat <<-END_VERSIONS > versions.yml diff --git a/modules/vcfanno/meta.yml b/modules/vcfanno/meta.yml index bbd2ab16..1c6893ea 100644 --- a/modules/vcfanno/meta.yml +++ b/modules/vcfanno/meta.yml @@ -5,6 +5,8 @@ keywords: - bed - annotate - variant + - lua + - toml tools: - vcfanno: description: annotate a VCF with other VCFs/BEDs/tabixed files @@ -23,19 +25,21 @@ input: - vcf: type: file description: query VCF file - pattern: "*.{vcf.gz}" - - vcf.tbi: + pattern: "*.{vcf, vcf.gz}" + - vcf_tabix: type: file - description: query VCF file index - pattern: "*.{vcf.gz.tbi}" - - vcfanno_config: + description: tabix index of query VCF - only needed if vcf is compressed + pattern: "*.vcf.gz.tbi" + - toml: + type: file + description: configuration file + pattern: "*.toml" + - resource_dir: type: file description: | - A simple configuration file is used to specify both the source files - and the set of attributes (in the case of VCF) - or columns (in the case of BED or other tab-delimited formats) - that should be added to the query file. - pattern: "*.{toml}" + This directory contains referenced files in the TOML config, + and the corresponding indicies e.g. exac.vcf.gz + exac.vcf.gz.tbi, + with exception to the lua file. output: - meta: @@ -50,7 +54,7 @@ output: - vcf: type: file description: Annotated VCF file - pattern: "*.{vcf}" + pattern: "*.vcf" authors: - "@projectoriented" diff --git a/tests/config/test_data.config b/tests/config/test_data.config index 165e8449..e28b0cb0 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -154,7 +154,9 @@ params { justhusky_ped = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/ped/justhusky.ped" justhusky_minimal_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/ped/justhusky_minimal.vcf.gz" justhusky_minimal_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/ped/justhusky_minimal.vcf.gz.tbi" - + + vcfanno_tar_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/vcfanno/vcfanno_grch38_module_test.tar.gz" + vcfanno_toml = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/vcfanno/vcfanno.toml" } 'pangenome' { pangenome_fa = "${test_data_dir}/pangenomics/homo_sapiens/pangenome.fa" diff --git a/tests/modules/vcfanno/main.nf b/tests/modules/vcfanno/main.nf index c46afa8f..045ffa31 100644 --- a/tests/modules/vcfanno/main.nf +++ b/tests/modules/vcfanno/main.nf @@ -2,18 +2,40 @@ nextflow.enable.dsl = 2 +include { UNTAR } from '../../../modules/untar/main.nf' include { VCFANNO } from '../../../modules/vcfanno/main.nf' workflow test_vcfanno { input = [ - [ id:'test', single_end:false ], // meta map + [ id:'test_compressed', single_end:false ], // meta map file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true) ] + + input_2 = [ [ id:'test_compressed', single_end:false ], // meta map + [] ] - toml = file("https://raw.githubusercontent.com/nf-core/test-datasets/8fbd9f99a2feb3f9e39cd3bcdc4a9176a5835673/data/delete_me/vcfanno.toml", - checkIfExists: true) + toml = file(params.test_data['homo_sapiens']['genome']['vcfanno_toml'], checkIfExists: true) + resource_dir = file(params.test_data['homo_sapiens']['genome']['vcfanno_tar_gz'], checkIfExists: true) - VCFANNO ( input, toml ) + UNTAR ( resource_dir ) + VCFANNO ( input, input_2, toml, UNTAR.out.untar ) } + +workflow test_vcfanno_uncompressed { + + input = [ [ id:'test_uncompressed', single_end:false ], // meta map + [] ,[] ] + + input_2 = [ + [ id:'test_uncompressed', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) + ] + + toml = file(params.test_data['homo_sapiens']['genome']['vcfanno_toml'], checkIfExists: true) + resource_dir = file(params.test_data['homo_sapiens']['genome']['vcfanno_tar_gz'], checkIfExists: true) + + UNTAR ( resource_dir ) + VCFANNO ( input, input_2, toml, UNTAR.out.untar ) +} \ No newline at end of file diff --git a/tests/modules/vcfanno/nextflow.config b/tests/modules/vcfanno/nextflow.config index 50f50a7a..ac724a73 100644 --- a/tests/modules/vcfanno/nextflow.config +++ b/tests/modules/vcfanno/nextflow.config @@ -1,5 +1,5 @@ process { - + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } } \ No newline at end of file diff --git a/tests/modules/vcfanno/test.yml b/tests/modules/vcfanno/test.yml index 0b5394d2..eae5d3b0 100644 --- a/tests/modules/vcfanno/test.yml +++ b/tests/modules/vcfanno/test.yml @@ -3,7 +3,21 @@ tags: - vcfanno files: - - path: output/vcfanno/test_annotated.vcf - md5sum: 34259cf6b0a4698a2917ad3554b50c0f + - path: output/untar/versions.yml + md5sum: 6de038155bccbe4d264d09529bf9f4d8 + - path: output/vcfanno/test_compressed_annotated.vcf + md5sum: 90ceb2fd2e06e781846d69c3981db665 - path: output/vcfanno/versions.yml md5sum: 62d13540503b22f04a2280c91942cb03 + +- name: vcfanno test_vcfanno_uncompressed + command: nextflow run tests/modules/vcfanno -entry test_vcfanno_uncompressed -c tests/config/nextflow.config + tags: + - vcfanno + files: + - path: output/untar/versions.yml + md5sum: a0de70274caac0cca31b077a1e7172d9 + - path: output/vcfanno/test_uncompressed_annotated.vcf + md5sum: 90ceb2fd2e06e781846d69c3981db665 + - path: output/vcfanno/versions.yml + md5sum: 300218dad68c3ffcc4783daa4f7c5a43 From 4dbc166a7c30e963511fb5c9870fbcaa158a53a9 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 21 Feb 2022 13:26:51 +0000 Subject: [PATCH 14/15] Change label for rseqc/tin to process_high (#1327) --- modules/rseqc/tin/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/rseqc/tin/main.nf b/modules/rseqc/tin/main.nf index da58fc06..67493371 100644 --- a/modules/rseqc/tin/main.nf +++ b/modules/rseqc/tin/main.nf @@ -1,6 +1,6 @@ process RSEQC_TIN { tag "$meta.id" - label 'process_medium' + label 'process_high' conda (params.enable_conda ? "bioconda::rseqc=3.0.1 'conda-forge::r-base>=3.5'" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? From 2383d43a0fdfdfb1fe15128ad8723b9f6f40d5cc Mon Sep 17 00:00:00 2001 From: nickhsmith Date: Tue, 22 Feb 2022 14:25:36 +0100 Subject: [PATCH 15/15] update priority (#1334) bumping the base resource label --- modules/gatk4/genotypegvcfs/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk4/genotypegvcfs/main.nf b/modules/gatk4/genotypegvcfs/main.nf index b596e005..4a42ad0a 100644 --- a/modules/gatk4/genotypegvcfs/main.nf +++ b/modules/gatk4/genotypegvcfs/main.nf @@ -1,6 +1,6 @@ process GATK4_GENOTYPEGVCFS { tag "$meta.id" - label 'process_medium' + label 'process_high' conda (params.enable_conda ? "bioconda::gatk4=4.2.5.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?