From c7f022008c561e70e2ed4a875c17ea1ece109f43 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 21 Mar 2022 15:07:59 +0100 Subject: [PATCH 1/3] Start getting database prep ready --- modules.json | 3 +++ modules/nf-core/modules/untar/main.nf | 36 ++++++++++++++++++++++++++ modules/nf-core/modules/untar/meta.yml | 28 ++++++++++++++++++++ subworkflows/local/db_check.nf | 14 +++++++++- subworkflows/local/input_check.nf | 2 +- 5 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 modules/nf-core/modules/untar/main.nf create mode 100644 modules/nf-core/modules/untar/meta.yml diff --git a/modules.json b/modules.json index 6a785b8..9a7f1df 100644 --- a/modules.json +++ b/modules.json @@ -23,6 +23,9 @@ }, "multiqc": { "git_sha": "20d8250d9f39ddb05dfb437603aaf99b5c0b2b41" + }, + "untar": { + "git_sha": "7ec09d0ef4df89617baacc9b2dafcddb7cd4b05a" } } } diff --git a/modules/nf-core/modules/untar/main.nf b/modules/nf-core/modules/untar/main.nf new file mode 100644 index 0000000..bbae948 --- /dev/null +++ b/modules/nf-core/modules/untar/main.nf @@ -0,0 +1,36 @@ +process UNTAR { + tag "$archive" + label 'process_low' + + conda (params.enable_conda ? "conda-forge::tar=1.32" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : + 'biocontainers/biocontainers:v1.2.0_cv1' }" + + input: + path archive + + output: + path "$untar" , emit: untar + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + untar = archive.toString() - '.tar.gz' + """ + tar \\ + -xzvf \\ + $args \\ + $archive \\ + $args2 \\ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/untar/meta.yml b/modules/nf-core/modules/untar/meta.yml new file mode 100644 index 0000000..e877a97 --- /dev/null +++ b/modules/nf-core/modules/untar/meta.yml @@ -0,0 +1,28 @@ +name: untar +description: Extract files. +keywords: + - untar + - uncompress +tools: + - untar: + description: | + Extract tar.gz files. + documentation: https://www.gnu.org/software/tar/manual/ + licence: ["GPL-3.0-or-later"] +input: + - archive: + type: file + description: File to be untar + pattern: "*.{tar}.{gz}" +output: + - untar: + type: file + description: + pattern: "*.*" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" diff --git a/subworkflows/local/db_check.nf b/subworkflows/local/db_check.nf index 909d98f..6f061d7 100644 --- a/subworkflows/local/db_check.nf +++ b/subworkflows/local/db_check.nf @@ -19,8 +19,20 @@ workflow DB_CHECK { .dump(tag: "db_channel_prepped") .set{ dbs } + + parsed_samplesheet + .branch { + untar: it[0]['db_path'].toString().endsWith(".tar.gz") + skip: true + } + .set{ ch_dbs_for_untar } + + UNTAR ( ch_dbs_for_untar.untar ) + + ch_final_dbs = ch_dbs_for_untar.skip.mix( ch_dbs_untarred ) + emit: - dbs // channel: [ val(meta), [ db ] ] + dbs = ch_final_dbs // channel: [ val(meta), [ db ] ] versions = DATABASE_CHECK.out.versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index d66fb3a..938c87f 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -35,7 +35,7 @@ workflow INPUT_CHECK { } // Function to get list of [ meta, [ fastq_1, fastq_2 ] ] -def create_fastq_channel(LinkedHashMap row) { +def create_fastq_channels(LinkedHashMap row) { // create meta map def meta = [:] meta.id = row.sample From 631c115e1003fc7cb9b0c893bf7cde761f4287ef Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Mon, 21 Mar 2022 14:58:19 +0000 Subject: [PATCH 2/3] Adds PoC of untarring system --- conf/modules.config | 2 +- modules.json | 2 +- modules/nf-core/modules/untar/main.nf | 10 +++++----- modules/nf-core/modules/untar/meta.yml | 10 ++++++++++ subworkflows/local/db_check.nf | 9 +++++---- 5 files changed, 22 insertions(+), 11 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 9e334bc..620ae1d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -82,7 +82,7 @@ process { publishDir = [ path: { "${params.outdir}/kraken2/${meta.db_name}" }, mode: 'copy', - pattern: '.{fastq.gz,txt}' + pattern: '*.{fastq.gz,txt}' ] ext.args = { "${meta.db_params}" } ext.when = params.run_kraken2 diff --git a/modules.json b/modules.json index 9a7f1df..96a43d8 100644 --- a/modules.json +++ b/modules.json @@ -25,7 +25,7 @@ "git_sha": "20d8250d9f39ddb05dfb437603aaf99b5c0b2b41" }, "untar": { - "git_sha": "7ec09d0ef4df89617baacc9b2dafcddb7cd4b05a" + "git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918" } } } diff --git a/modules/nf-core/modules/untar/main.nf b/modules/nf-core/modules/untar/main.nf index bbae948..dc43fb7 100644 --- a/modules/nf-core/modules/untar/main.nf +++ b/modules/nf-core/modules/untar/main.nf @@ -8,19 +8,19 @@ process UNTAR { 'biocontainers/biocontainers:v1.2.0_cv1' }" input: - path archive + tuple val(meta), path(archive) output: - path "$untar" , emit: untar - path "versions.yml", emit: versions + tuple val(meta), path("$untar"), emit: untar + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' - untar = archive.toString() - '.tar.gz' + untar = archive.toString() - '.tar.gz' """ tar \\ -xzvf \\ diff --git a/modules/nf-core/modules/untar/meta.yml b/modules/nf-core/modules/untar/meta.yml index e877a97..d426919 100644 --- a/modules/nf-core/modules/untar/meta.yml +++ b/modules/nf-core/modules/untar/meta.yml @@ -10,11 +10,21 @@ tools: documentation: https://www.gnu.org/software/tar/manual/ licence: ["GPL-3.0-or-later"] input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - archive: type: file description: File to be untar pattern: "*.{tar}.{gz}" output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - untar: type: file description: diff --git a/subworkflows/local/db_check.nf b/subworkflows/local/db_check.nf index 6f061d7..641108d 100644 --- a/subworkflows/local/db_check.nf +++ b/subworkflows/local/db_check.nf @@ -3,6 +3,7 @@ // include { DATABASE_CHECK } from '../../modules/local/database_check' +include { UNTAR } from '../../modules/nf-core/modules/untar/main' workflow DB_CHECK { take: @@ -17,19 +18,19 @@ workflow DB_CHECK { .dump(tag: "db_split_csv_out") .map { create_db_channels(it) } .dump(tag: "db_channel_prepped") - .set{ dbs } - parsed_samplesheet .branch { - untar: it[0]['db_path'].toString().endsWith(".tar.gz") + untar: it[1].toString().endsWith(".tar.gz") skip: true } .set{ ch_dbs_for_untar } + // TODO Filter to only run UNTAR on DBs of tools actually using? + // TODO make optional whether to save UNTAR ( ch_dbs_for_untar.untar ) - ch_final_dbs = ch_dbs_for_untar.skip.mix( ch_dbs_untarred ) + ch_final_dbs = ch_dbs_for_untar.skip.mix( UNTAR.out.untar ) emit: dbs = ch_final_dbs // channel: [ val(meta), [ db ] ] From 07eed435c628ae0518246aa31e87d81fb02c69ad Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 21 Mar 2022 19:54:51 +0100 Subject: [PATCH 3/3] Replace set with explicity assignment --- subworkflows/local/db_check.nf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/subworkflows/local/db_check.nf b/subworkflows/local/db_check.nf index 641108d..890e373 100644 --- a/subworkflows/local/db_check.nf +++ b/subworkflows/local/db_check.nf @@ -19,12 +19,11 @@ workflow DB_CHECK { .map { create_db_channels(it) } .dump(tag: "db_channel_prepped") - parsed_samplesheet + ch_dbs_for_untar = parsed_samplesheet .branch { untar: it[1].toString().endsWith(".tar.gz") skip: true } - .set{ ch_dbs_for_untar } // TODO Filter to only run UNTAR on DBs of tools actually using? // TODO make optional whether to save