From 3930ba227bc49541e25272eb9332a10d01a1b271 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Fri, 6 May 2022 14:10:44 +0200 Subject: [PATCH 1/6] merge snapaligner modes into one --- modules/snapaligner/{paired => align}/main.nf | 7 +-- .../snapaligner/{paired => align}/meta.yml | 6 +-- modules/snapaligner/single/main.nf | 41 ---------------- modules/snapaligner/single/meta.yml | 48 ------------------- tests/modules/snapaligner/align/main.nf | 29 +++++++++++ .../{paired => align}/nextflow.config | 0 tests/modules/snapaligner/align/test.yml | 19 ++++++++ tests/modules/snapaligner/paired/main.nf | 17 ------- tests/modules/snapaligner/paired/test.yml | 9 ---- tests/modules/snapaligner/single/main.nf | 17 ------- .../snapaligner/single/nextflow.config | 5 -- tests/modules/snapaligner/single/test.yml | 9 ---- 12 files changed, 55 insertions(+), 152 deletions(-) rename modules/snapaligner/{paired => align}/main.nf (87%) rename modules/snapaligner/{paired => align}/meta.yml (86%) delete mode 100644 modules/snapaligner/single/main.nf delete mode 100644 modules/snapaligner/single/meta.yml create mode 100644 tests/modules/snapaligner/align/main.nf rename tests/modules/snapaligner/{paired => align}/nextflow.config (100%) create mode 100644 tests/modules/snapaligner/align/test.yml delete mode 100644 tests/modules/snapaligner/paired/main.nf delete mode 100644 tests/modules/snapaligner/paired/test.yml delete mode 100644 tests/modules/snapaligner/single/main.nf delete mode 100644 tests/modules/snapaligner/single/nextflow.config delete mode 100644 tests/modules/snapaligner/single/test.yml diff --git a/modules/snapaligner/paired/main.nf b/modules/snapaligner/align/main.nf similarity index 87% rename from modules/snapaligner/paired/main.nf rename to modules/snapaligner/align/main.nf index 57044893..ef7af561 100644 --- a/modules/snapaligner/paired/main.nf +++ b/modules/snapaligner/align/main.nf @@ -1,4 +1,4 @@ -process SNAPALIGNER_PAIRED { +process SNAPALIGNER_ALIGN { tag '$meta.id' label 'process_high' @@ -21,15 +21,16 @@ process SNAPALIGNER_PAIRED { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def subcmd = meta.single_end ? "single" : "paired" """ mkdir -p index mv $index index/ - snap-aligner paired \\ + snap-aligner ${subcmd} \\ index \\ ${reads.join(" ")} \\ - -o -bam ${prefix}.bam \\ + -o ${prefix}.bam \\ -t ${task.cpus} \\ $args diff --git a/modules/snapaligner/paired/meta.yml b/modules/snapaligner/align/meta.yml similarity index 86% rename from modules/snapaligner/paired/meta.yml rename to modules/snapaligner/align/meta.yml index b19e0174..611b0b71 100644 --- a/modules/snapaligner/paired/meta.yml +++ b/modules/snapaligner/align/meta.yml @@ -1,5 +1,5 @@ -name: "snapaligner_paired" -description: Performs paired end fastq alignment to a fasta reference using SNAP +name: "snapaligner_align" +description: Performs fastq alignment to a fasta reference using SNAP keywords: - alignment - map @@ -22,7 +22,7 @@ input: e.g. [ id:'test', single_end:false ] - reads: type: file - description: List of input fastq files of size 2 for fastq or 1 for bam + description: List of input fastq files of size 2 for paired fastq or 1 for bam or single fastq pattern: "*.{fastq.gz,fq.gz,fastq,fq,bam}" - index: type: file diff --git a/modules/snapaligner/single/main.nf b/modules/snapaligner/single/main.nf deleted file mode 100644 index b13e1153..00000000 --- a/modules/snapaligner/single/main.nf +++ /dev/null @@ -1,41 +0,0 @@ -process SNAPALIGNER_SINGLE { - tag '$meta.id' - label 'process_high' - - conda (params.enable_conda ? "bioconda::snap-aligner=2.0.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/snap-aligner:2.0.1--hd03093a_1': - 'quay.io/biocontainers/snap-aligner:2.0.1--hd03093a_1' }" - - input: - tuple val(meta), path(reads) - path index - - output: - tuple val(meta), path("*.bam"), emit: bam - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - - """ - mkdir -p index - mv $index index/ - - snap-aligner single \\ - index \\ - ${reads.join(" ")} \\ - -o -bam ${prefix}.bam \\ - -t ${task.cpus} \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - snapaligner: \$(snap-aligner 2>&1| head -n 1 | sed 's/^.*version //;s/.\$//') - END_VERSIONS - """ -} diff --git a/modules/snapaligner/single/meta.yml b/modules/snapaligner/single/meta.yml deleted file mode 100644 index e69cc721..00000000 --- a/modules/snapaligner/single/meta.yml +++ /dev/null @@ -1,48 +0,0 @@ -name: "snapaligner_single" -description: Performs single end fastq alignment to a fasta reference using SNAP -keywords: - - alignment - - map - - fastq - - bam - - sam -tools: - - "snapaligner": - description: "Scalable Nucleotide Alignment Program -- a fast and accurate read aligner for high-throughput sequencing data" - homepage: "http://snap.cs.berkeley.edu" - documentation: "https://1drv.ms/b/s!AhuEg_0yZD86hcpblUt-muHKYsG8fA?e=R8ogug" - tool_dev_url: "https://github.com/amplab/snap" - doi: "10.1101/2021.11.23.469039" - licence: "['Apache v2']" -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: List of single end input files - pattern: "*.{fastq.gz,fq.gz,fastq,fq,bam}" - - index: - type: file - description: List of SNAP genome index files - pattern: "{Genome,GenomeIndex,GenomeIndexHash,OverflowTable}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - bam: - type: file - description: Aligned BAM file - pattern: "*.{bam}" - -authors: - - "@matthdsm" diff --git a/tests/modules/snapaligner/align/main.nf b/tests/modules/snapaligner/align/main.nf new file mode 100644 index 00000000..4f5943fe --- /dev/null +++ b/tests/modules/snapaligner/align/main.nf @@ -0,0 +1,29 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { SNAPALIGNER_INDEX } from '../../../../modules/snapaligner/index/main.nf' +include { SNAPALIGNER_ALIGN as SNAPALIGNER_SINGLE } from '../../../../modules/snapaligner/align/main.nf' +include { SNAPALIGNER_ALIGN as SNAPALIGNER_PAIRED } from '../../../../modules/snapaligner/align/main.nf' + +workflow test_snapaligner_single { + + input = [ + [ id:'test', single_end:true ], // meta map + [file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] + ] + + SNAPALIGNER_INDEX ( file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),[],[],[]) + SNAPALIGNER_SINGLE ( input, SNAPALIGNER_INDEX.out.index ) +} + +workflow test_snapaligner_paired { + + input = [ + [ id:'test', single_end:false ], // meta map + [file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)] + ] + + SNAPALIGNER_INDEX ( file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),[],[],[]) + SNAPALIGNER_PAIRED ( input, SNAPALIGNER_INDEX.out.index ) +} diff --git a/tests/modules/snapaligner/paired/nextflow.config b/tests/modules/snapaligner/align/nextflow.config similarity index 100% rename from tests/modules/snapaligner/paired/nextflow.config rename to tests/modules/snapaligner/align/nextflow.config diff --git a/tests/modules/snapaligner/align/test.yml b/tests/modules/snapaligner/align/test.yml new file mode 100644 index 00000000..9cc7e230 --- /dev/null +++ b/tests/modules/snapaligner/align/test.yml @@ -0,0 +1,19 @@ +- name: snapaligner align test_snapaligner_single + command: nextflow run tests/modules/snapaligner/align -entry test_snapaligner_single -c tests/config/nextflow.config + tags: + - snapaligner/single + - snapaligner + files: + - path: output/snapaligner/test.bam + md5sum: 5d95594e4ef1ee23ce56e6a7cb64f0f2 + - path: output/snapaligner/versions.yml + +- name: snapaligner align test_snapaligner_paired + command: nextflow run tests/modules/snapaligner/align -entry test_snapaligner_paired -c tests/config/nextflow.config + tags: + - snapaligner/paired + - snapaligner + files: + - path: output/snapaligner/test.bam + md5sum: a1405da5876f15dbe8a81516b94c2a15 + - path: output/snapaligner/versions.yml diff --git a/tests/modules/snapaligner/paired/main.nf b/tests/modules/snapaligner/paired/main.nf deleted file mode 100644 index b25ca8c2..00000000 --- a/tests/modules/snapaligner/paired/main.nf +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env nextflow - -nextflow.enable.dsl = 2 - -include { SNAPALIGNER_INDEX } from '../../../../modules/snapaligner/index/main.nf' -include { SNAPALIGNER_PAIRED } from '../../../../modules/snapaligner/paired/main.nf' - -workflow test_snapaligner_paired { - - input = [ - [ id:'test', single_end:false ], // meta map - [file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)] - ] - - SNAPALIGNER_INDEX ( file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),[],[],[]) - SNAPALIGNER_PAIRED ( input, SNAPALIGNER_INDEX.out.index ) -} diff --git a/tests/modules/snapaligner/paired/test.yml b/tests/modules/snapaligner/paired/test.yml deleted file mode 100644 index 7df1e02b..00000000 --- a/tests/modules/snapaligner/paired/test.yml +++ /dev/null @@ -1,9 +0,0 @@ -- name: snapaligner paired test_snapaligner_paired - command: nextflow run tests/modules/snapaligner/paired -entry test_snapaligner_paired -c tests/config/nextflow.config - tags: - - snapaligner - - snapaligner/paired - files: - - path: output/snapaligner/test.bam - md5sum: 2ac92e9539fa246dd6db52b5de56fca5 - - path: output/snapaligner/versions.yml diff --git a/tests/modules/snapaligner/single/main.nf b/tests/modules/snapaligner/single/main.nf deleted file mode 100644 index 616e517a..00000000 --- a/tests/modules/snapaligner/single/main.nf +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env nextflow - -nextflow.enable.dsl = 2 - -include { SNAPALIGNER_INDEX } from '../../../../modules/snapaligner/index/main.nf' -include { SNAPALIGNER_SINGLE } from '../../../../modules/snapaligner/single/main.nf' - -workflow test_snapaligner_single { - - input = [ - [ id:'test', single_end:false ], // meta map - [file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] - ] - - SNAPALIGNER_INDEX ( file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),[],[],[]) - SNAPALIGNER_SINGLE ( input, SNAPALIGNER_INDEX.out.index ) -} diff --git a/tests/modules/snapaligner/single/nextflow.config b/tests/modules/snapaligner/single/nextflow.config deleted file mode 100644 index 50f50a7a..00000000 --- a/tests/modules/snapaligner/single/nextflow.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - - publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } - -} \ No newline at end of file diff --git a/tests/modules/snapaligner/single/test.yml b/tests/modules/snapaligner/single/test.yml deleted file mode 100644 index bbcbba1f..00000000 --- a/tests/modules/snapaligner/single/test.yml +++ /dev/null @@ -1,9 +0,0 @@ -- name: snapaligner single test_snapaligner_single - command: nextflow run tests/modules/snapaligner/single -entry test_snapaligner_single -c tests/config/nextflow.config - tags: - - snapaligner/single - - snapaligner - files: - - path: output/snapaligner/test.bam - md5sum: 696f7ea8e1aa5f9d7dafb9d0134fe25d - - path: output/snapaligner/versions.yml From 26d2231e854b909fcf0d2a4971c01a7030ae6cf2 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Fri, 6 May 2022 14:16:38 +0200 Subject: [PATCH 2/6] fix linting --- tests/config/pytest_modules.yml | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index effe6fe2..2c9ea7d4 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1763,13 +1763,9 @@ snapaligner/index: - modules/snapaligner/index/** - tests/modules/snapaligner/index/** -snapaligner/paired: - - modules/snapaligner/paired/** - - tests/modules/snapaligner/paired/** - -snapaligner/single: - - modules/snapaligner/single/** - - tests/modules/snapaligner/single/** +snapaligner/align: + - modules/snapaligner/align/** + - tests/modules/snapaligner/align/** snpdists: - modules/snpdists/** From ed09978222ca8fba9b4f9a872b84d8a75acdfc5c Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Fri, 6 May 2022 14:16:15 +0200 Subject: [PATCH 3/6] chore: update build number --- modules/sratools/prefetch/main.nf | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/modules/sratools/prefetch/main.nf b/modules/sratools/prefetch/main.nf index 2233d0aa..75fa17a1 100644 --- a/modules/sratools/prefetch/main.nf +++ b/modules/sratools/prefetch/main.nf @@ -5,8 +5,8 @@ process SRATOOLS_PREFETCH { conda (params.enable_conda ? 'bioconda::sra-tools=2.11.0' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/sra-tools:2.11.0--pl5262h314213e_0' : - 'quay.io/biocontainers/sra-tools:2.11.0--pl5262h314213e_0' }" + 'https://depot.galaxyproject.org/singularity/sra-tools:2.11.0--pl5321ha49a11a_3' : + 'quay.io/biocontainers/sra-tools:2.11.0--pl5321ha49a11a_3' }" input: tuple val(meta), val(id) @@ -30,7 +30,6 @@ process SRATOOLS_PREFETCH { prefetch \\ $args \\ - --progress \\ $id vdb-validate $id From dd1c66783ae5fd2cd8b416687e465d9f1b558282 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Fri, 6 May 2022 17:09:01 +0200 Subject: [PATCH 4/6] refactor: use a template to add retrying --- modules/sratools/prefetch/main.nf | 27 ++------- .../prefetch/templates/retry_with_backoff.sh | 59 +++++++++++++++++++ 2 files changed, 65 insertions(+), 21 deletions(-) create mode 100755 modules/sratools/prefetch/templates/retry_with_backoff.sh diff --git a/modules/sratools/prefetch/main.nf b/modules/sratools/prefetch/main.nf index 75fa17a1..92a66616 100644 --- a/modules/sratools/prefetch/main.nf +++ b/modules/sratools/prefetch/main.nf @@ -12,31 +12,16 @@ process SRATOOLS_PREFETCH { tuple val(meta), val(id) output: - tuple val(meta), path("$id"), emit: sra + tuple val(meta), path(id), emit: sra path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when - script: - def args = task.ext.args ?: '' - def config = "/LIBS/GUID = \"${UUID.randomUUID().toString()}\"\\n/libs/cloud/report_instance_identity = \"true\"\\n" - """ - eval "\$(vdb-config -o n NCBI_SETTINGS | sed 's/[" ]//g')" - if [[ ! -f "\${NCBI_SETTINGS}" ]]; then - mkdir -p "\$(dirname "\${NCBI_SETTINGS}")" - printf '${config}' > "\${NCBI_SETTINGS}" - fi + shell: + args = task.ext.args ?: '' + args2 = task.ext.args2 ?: '5 1 100' // + config = "/LIBS/GUID = \"${UUID.randomUUID().toString()}\"\\n/libs/cloud/report_instance_identity = \"true\"\\n" - prefetch \\ - $args \\ - $id - - vdb-validate $id - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sratools: \$(prefetch --version 2>&1 | grep -Eo '[0-9.]+') - END_VERSIONS - """ + template 'retry_with_backoff.sh' } diff --git a/modules/sratools/prefetch/templates/retry_with_backoff.sh b/modules/sratools/prefetch/templates/retry_with_backoff.sh new file mode 100755 index 00000000..fbcb6532 --- /dev/null +++ b/modules/sratools/prefetch/templates/retry_with_backoff.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash + +set -u + +retry_with_backoff() { + local max_attempts=${1} + local delay=${2} + local max_time=${3} + local attempt=1 + local output= + local status= + + # Remove the first three arguments to this function in order to access + # the 'real' command with `${@}`. + shift 3 + + while [ ${attempt} -le ${max_attempts} ]; do + output=$("${@}") + status=${?} + + if [ ${status} -eq 0 ]; then + break + fi + + if [ ${attempt} -lt ${max_attempts} ]; then + echo "Failed attempt ${attempt} of ${max_attempts}. Retrying in ${delay} s." >&2 + sleep ${delay} + elif [ ${attempt} -eq ${max_attempts} ]; then + echo "Failed after ${attempt} attempts." >&2 + return ${status} + fi + + attempt=$(( ${attempt} + 1 )) + delay=$(( ${delay} * 2 )) + if [ ${delay} -ge ${max_time} ]; then + delay=${max_time} + fi + done + + echo "${output}" +} + +eval "$(vdb-config -o n NCBI_SETTINGS | sed 's/[" ]//g')" +if [[ ! -f "${NCBI_SETTINGS}" ]]; then + mkdir -p "$(dirname "${NCBI_SETTINGS}")" + printf '!{config}' > "${NCBI_SETTINGS}" +fi + +retry_with_backoff !{args2} \ + prefetch \ + !{args} \ + !{id} + +vdb-validate !{id} + +cat <<-END_VERSIONS > versions.yml +"!{task.process}": + sratools: $(prefetch --version 2>&1 | grep -Eo '[0-9.]+') +END_VERSIONS From ea41c753c0d8348260dd552f58dc0aca7bdd4d51 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Fri, 6 May 2022 17:28:56 +0200 Subject: [PATCH 5/6] fix: remove retry label Call gets retried by bash script --- modules/sratools/prefetch/main.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/sratools/prefetch/main.nf b/modules/sratools/prefetch/main.nf index 92a66616..3408c3e9 100644 --- a/modules/sratools/prefetch/main.nf +++ b/modules/sratools/prefetch/main.nf @@ -1,7 +1,6 @@ process SRATOOLS_PREFETCH { tag "$id" label 'process_low' - label 'error_retry' conda (params.enable_conda ? 'bioconda::sra-tools=2.11.0' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? From dd458e3388d50212e447e85794f1b027abda5c93 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Fri, 6 May 2022 18:57:17 +0200 Subject: [PATCH 6/6] fix tags --- tests/modules/snapaligner/align/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/modules/snapaligner/align/test.yml b/tests/modules/snapaligner/align/test.yml index 9cc7e230..4e4fae61 100644 --- a/tests/modules/snapaligner/align/test.yml +++ b/tests/modules/snapaligner/align/test.yml @@ -1,7 +1,7 @@ - name: snapaligner align test_snapaligner_single command: nextflow run tests/modules/snapaligner/align -entry test_snapaligner_single -c tests/config/nextflow.config tags: - - snapaligner/single + - snapaligner/align - snapaligner files: - path: output/snapaligner/test.bam @@ -11,7 +11,7 @@ - name: snapaligner align test_snapaligner_paired command: nextflow run tests/modules/snapaligner/align -entry test_snapaligner_paired -c tests/config/nextflow.config tags: - - snapaligner/paired + - snapaligner/align - snapaligner files: - path: output/snapaligner/test.bam