diff --git a/conf/modules.config b/conf/modules.config index c09a011..ab8f021 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -91,7 +91,7 @@ process { publishDir = [ path: { "${params.outdir}/kraken2/${meta.db_name}" }, mode: 'copy', - pattern: '.{fastq.gz,txt}' + pattern: '*.{fastq.gz,txt}' ] ext.args = { "${meta.db_params}" } ext.prefix = { "${meta.id}-${meta.db_name}" } diff --git a/modules.json b/modules.json index 284cf13..673a69b 100644 --- a/modules.json +++ b/modules.json @@ -24,6 +24,8 @@ "multiqc": { "git_sha": "20d8250d9f39ddb05dfb437603aaf99b5c0b2b41" }, + "untar": { + "git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918" "porechop": { "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" } diff --git a/modules/nf-core/modules/untar/main.nf b/modules/nf-core/modules/untar/main.nf new file mode 100644 index 0000000..dc43fb7 --- /dev/null +++ b/modules/nf-core/modules/untar/main.nf @@ -0,0 +1,36 @@ +process UNTAR { + tag "$archive" + label 'process_low' + + conda (params.enable_conda ? "conda-forge::tar=1.32" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : + 'biocontainers/biocontainers:v1.2.0_cv1' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$untar"), emit: untar + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + untar = archive.toString() - '.tar.gz' + """ + tar \\ + -xzvf \\ + $args \\ + $archive \\ + $args2 \\ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/untar/meta.yml b/modules/nf-core/modules/untar/meta.yml new file mode 100644 index 0000000..d426919 --- /dev/null +++ b/modules/nf-core/modules/untar/meta.yml @@ -0,0 +1,38 @@ +name: untar +description: Extract files. +keywords: + - untar + - uncompress +tools: + - untar: + description: | + Extract tar.gz files. + documentation: https://www.gnu.org/software/tar/manual/ + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be untar + pattern: "*.{tar}.{gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - untar: + type: file + description: + pattern: "*.*" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" diff --git a/subworkflows/local/db_check.nf b/subworkflows/local/db_check.nf index 909d98f..890e373 100644 --- a/subworkflows/local/db_check.nf +++ b/subworkflows/local/db_check.nf @@ -3,6 +3,7 @@ // include { DATABASE_CHECK } from '../../modules/local/database_check' +include { UNTAR } from '../../modules/nf-core/modules/untar/main' workflow DB_CHECK { take: @@ -17,10 +18,21 @@ workflow DB_CHECK { .dump(tag: "db_split_csv_out") .map { create_db_channels(it) } .dump(tag: "db_channel_prepped") - .set{ dbs } + + ch_dbs_for_untar = parsed_samplesheet + .branch { + untar: it[1].toString().endsWith(".tar.gz") + skip: true + } + + // TODO Filter to only run UNTAR on DBs of tools actually using? + // TODO make optional whether to save + UNTAR ( ch_dbs_for_untar.untar ) + + ch_final_dbs = ch_dbs_for_untar.skip.mix( UNTAR.out.untar ) emit: - dbs // channel: [ val(meta), [ db ] ] + dbs = ch_final_dbs // channel: [ val(meta), [ db ] ] versions = DATABASE_CHECK.out.versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 2e30bcc..67dadc2 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -42,7 +42,7 @@ workflow INPUT_CHECK { } // Function to get list of [ meta, [ fastq_1, fastq_2 ] ] -def create_fastq_channel(LinkedHashMap row) { +def create_fastq_channels(LinkedHashMap row) { // create meta map def meta = [:] meta.id = row.sample