From 1d6f47ce549f7a540f85c37c61029b25d6e3f149 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 21 Sep 2021 11:17:00 +0100 Subject: [PATCH] Refactor cat/cat module (#723) * Refactor cat/cat module * Change gzip terminology --- modules/cat/cat/main.nf | 36 +++++++++++++++++++++---------- modules/cat/cat/meta.yml | 21 +++++++++--------- tests/modules/cat/cat/main.nf | 39 +++++++++++++++++----------------- tests/modules/cat/cat/test.yml | 26 +++++++++++------------ 4 files changed, 68 insertions(+), 54 deletions(-) diff --git a/modules/cat/cat/main.nf b/modules/cat/cat/main.nf index 6a5ffe83..1c7dbd7c 100644 --- a/modules/cat/cat/main.nf +++ b/modules/cat/cat/main.nf @@ -18,22 +18,36 @@ process CAT_CAT { } input: - path files + path files_in + val file_out output: - path "file*" , emit: file + path "${file_out}*" , emit: file_out path "*.version.txt", emit: version script: - def software = getSoftwareName(task.process) - cpus = Math.floor(task.cpus/2).toInteger() + def file_list = files_in.collect { it.toString() } + if (file_list.size > 1) { - // Use options.suffix if specified, otherwise .out; add .gz if first input file has it - suffix = options.suffix ? "${options.suffix}" : ".out" - suffix += files[0].name =~ /\.gz/ ? '.gz' : '' + // | input | output | command1 | command2 | + // |-----------|------------|----------|----------| + // | gzipped | gzipped | cat | | + // | ungzipped | ungzipped | cat | | + // | gzipped | ungzipped | zcat | | + // | ungzipped | gzipped | cat | pigz | - """ - cat ${options.args} $files ${options.args2} > file${suffix} - cat --version | grep 'GNU coreutils' | sed 's/cat (GNU coreutils) //' > ${software}.version.txt - """ + def in_zip = file_list[0].endsWith('.gz') + def out_zip = file_out.endsWith('.gz') + def command1 = (in_zip && !out_zip) ? 'zcat' : 'cat' + def command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $options.args2" : '' + """ + $command1 \\ + $options.args \\ + ${file_list.join(' ')} \\ + $command2 \\ + > $file_out + + echo \$(pigz --version 2>&1) | sed 's/pigz //g' > pigz.version.txt + """ + } } diff --git a/modules/cat/cat/meta.yml b/modules/cat/cat/meta.yml index 22dc29fc..a1318b19 100644 --- a/modules/cat/cat/meta.yml +++ b/modules/cat/cat/meta.yml @@ -1,33 +1,34 @@ name: cat_cat -description: A module for concatenation of gzipped or uncompressed files, optionally filtering the output +description: A module for concatenation of gzipped or uncompressed files keywords: - concatenate - gzip - - filter + - cat tools: - cat: description: Just concatenation homepage: None - documentation: None + documentation: https://man7.org/linux/man-pages/man1/cat.1.html tool_dev_url: None - doi: "" - licence: "" input: - - files: + - files_in: type: file - description: Gzipped or not files + description: List of compressed / uncompressed files pattern: "*" + - file_out: + type: value + description: Full name of output file with or without .gz extension output: - version: type: file description: File containing version of the pigz software pattern: "*.{version.txt}" - - bam: + - file_out: type: file - description: Concatenated, optionally filtered, file, gzipped if input was, otherwise not - pattern: "file*" + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" authors: - "@erikrikarddaniel" diff --git a/tests/modules/cat/cat/main.nf b/tests/modules/cat/cat/main.nf index abf039b3..a110a8ab 100644 --- a/tests/modules/cat/cat/main.nf +++ b/tests/modules/cat/cat/main.nf @@ -3,44 +3,43 @@ nextflow.enable.dsl = 2 include { CAT_CAT } from '../../../../modules/cat/cat/main.nf' addParams( options: [:] ) -include { CAT_CAT as CAT_CAT_SUFFIX } from '../../../../modules/cat/cat/main.nf' addParams( options: [suffix: ".fna"] ) -workflow test_cat_ungzipped { - +workflow test_cat_unzipped_unzipped { + input = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) ] - CAT_CAT ( input ) + CAT_CAT ( input, 'cat.txt' ) } -workflow test_cat_gzipped { - +workflow test_cat_zipped_zipped { + input = [ file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true), file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) ] - CAT_CAT ( input ) + CAT_CAT ( input, 'cat.txt.gz' ) } -workflow test_cat_ungzipped_fna { - +workflow test_cat_zipped_unzipped { + + input = [ + file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) + ] + + CAT_CAT ( input, 'cat.txt' ) +} + +workflow test_cat_unzipped_zipped { + input = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) ] - CAT_CAT_SUFFIX ( input ) -} - -workflow test_cat_gzipped_fna { - - input = [ - file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) - ] - - CAT_CAT_SUFFIX ( input ) + CAT_CAT ( input, 'cat.txt.gz' ) } diff --git a/tests/modules/cat/cat/test.yml b/tests/modules/cat/cat/test.yml index 084408b5..2f234a01 100644 --- a/tests/modules/cat/cat/test.yml +++ b/tests/modules/cat/cat/test.yml @@ -1,33 +1,33 @@ -- name: cat ungzipped - command: nextflow run ./tests/modules/cat/cat -entry test_cat_ungzipped -c tests/config/nextflow.config +- name: cat unzipped unzipped + command: nextflow run ./tests/modules/cat/cat -entry test_cat_unzipped_unzipped -c tests/config/nextflow.config tags: - cat - cat/cat files: - - path: output/cat/file.out + - path: output/cat/cat.txt md5sum: f44b33a0e441ad58b2d3700270e2dbe2 -- name: cat gzipped - command: nextflow run ./tests/modules/cat/cat -entry test_cat_gzipped -c tests/config/nextflow.config +- name: cat zipped zipped + command: nextflow run ./tests/modules/cat/cat -entry test_cat_zipped_zipped -c tests/config/nextflow.config tags: - cat - cat/cat files: - - path: output/cat/file.out.gz + - path: output/cat/cat.txt.gz -- name: cat ungzipped suffix - command: nextflow run ./tests/modules/cat/cat -entry test_cat_ungzipped_fna -c tests/config/nextflow.config +- name: cat zipped unzipped + command: nextflow run ./tests/modules/cat/cat -entry test_cat_zipped_unzipped -c tests/config/nextflow.config tags: - cat - cat/cat files: - - path: output/cat/file.fna - md5sum: f44b33a0e441ad58b2d3700270e2dbe2 + - path: output/cat/cat.txt + md5sum: c439d3b60e7bc03e8802a451a0d9a5d9 -- name: cat gzipped suffix - command: nextflow run ./tests/modules/cat/cat -entry test_cat_gzipped_fna -c tests/config/nextflow.config +- name: cat unzipped zipped + command: nextflow run ./tests/modules/cat/cat -entry test_cat_unzipped_zipped -c tests/config/nextflow.config tags: - cat - cat/cat files: - - path: output/cat/file.fna.gz + - path: output/cat/cat.txt.gz