Refactor cat/cat module (#723)

* Refactor cat/cat module

* Change gzip terminology
This commit is contained in:
Harshil Patel 2021-09-21 11:17:00 +01:00 committed by GitHub
parent b32c46c6a2
commit 1d6f47ce54
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 68 additions and 54 deletions

View file

@ -18,22 +18,36 @@ process CAT_CAT {
}
input:
path files
path files_in
val file_out
output:
path "file*" , emit: file
path "${file_out}*" , emit: file_out
path "*.version.txt", emit: version
script:
def software = getSoftwareName(task.process)
cpus = Math.floor(task.cpus/2).toInteger()
def file_list = files_in.collect { it.toString() }
if (file_list.size > 1) {
// Use options.suffix if specified, otherwise .out; add .gz if first input file has it
suffix = options.suffix ? "${options.suffix}" : ".out"
suffix += files[0].name =~ /\.gz/ ? '.gz' : ''
// | input | output | command1 | command2 |
// |-----------|------------|----------|----------|
// | gzipped | gzipped | cat | |
// | ungzipped | ungzipped | cat | |
// | gzipped | ungzipped | zcat | |
// | ungzipped | gzipped | cat | pigz |
"""
cat ${options.args} $files ${options.args2} > file${suffix}
cat --version | grep 'GNU coreutils' | sed 's/cat (GNU coreutils) //' > ${software}.version.txt
"""
def in_zip = file_list[0].endsWith('.gz')
def out_zip = file_out.endsWith('.gz')
def command1 = (in_zip && !out_zip) ? 'zcat' : 'cat'
def command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $options.args2" : ''
"""
$command1 \\
$options.args \\
${file_list.join(' ')} \\
$command2 \\
> $file_out
echo \$(pigz --version 2>&1) | sed 's/pigz //g' > pigz.version.txt
"""
}
}

View file

@ -1,33 +1,34 @@
name: cat_cat
description: A module for concatenation of gzipped or uncompressed files, optionally filtering the output
description: A module for concatenation of gzipped or uncompressed files
keywords:
- concatenate
- gzip
- filter
- cat
tools:
- cat:
description: Just concatenation
homepage: None
documentation: None
documentation: https://man7.org/linux/man-pages/man1/cat.1.html
tool_dev_url: None
doi: ""
licence: ""
input:
- files:
- files_in:
type: file
description: Gzipped or not files
description: List of compressed / uncompressed files
pattern: "*"
- file_out:
type: value
description: Full name of output file with or without .gz extension
output:
- version:
type: file
description: File containing version of the pigz software
pattern: "*.{version.txt}"
- bam:
- file_out:
type: file
description: Concatenated, optionally filtered, file, gzipped if input was, otherwise not
pattern: "file*"
description: Concatenated file. Will be gzipped if file_out ends with ".gz"
pattern: "${file_out}"
authors:
- "@erikrikarddaniel"

View file

@ -3,44 +3,43 @@
nextflow.enable.dsl = 2
include { CAT_CAT } from '../../../../modules/cat/cat/main.nf' addParams( options: [:] )
include { CAT_CAT as CAT_CAT_SUFFIX } from '../../../../modules/cat/cat/main.nf' addParams( options: [suffix: ".fna"] )
workflow test_cat_ungzipped {
workflow test_cat_unzipped_unzipped {
input = [
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true)
]
CAT_CAT ( input )
CAT_CAT ( input, 'cat.txt' )
}
workflow test_cat_gzipped {
workflow test_cat_zipped_zipped {
input = [
file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true)
]
CAT_CAT ( input )
CAT_CAT ( input, 'cat.txt.gz' )
}
workflow test_cat_ungzipped_fna {
workflow test_cat_zipped_unzipped {
input = [
file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true)
]
CAT_CAT ( input, 'cat.txt' )
}
workflow test_cat_unzipped_zipped {
input = [
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true)
]
CAT_CAT_SUFFIX ( input )
}
workflow test_cat_gzipped_fna {
input = [
file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true)
]
CAT_CAT_SUFFIX ( input )
CAT_CAT ( input, 'cat.txt.gz' )
}

View file

@ -1,33 +1,33 @@
- name: cat ungzipped
command: nextflow run ./tests/modules/cat/cat -entry test_cat_ungzipped -c tests/config/nextflow.config
- name: cat unzipped unzipped
command: nextflow run ./tests/modules/cat/cat -entry test_cat_unzipped_unzipped -c tests/config/nextflow.config
tags:
- cat
- cat/cat
files:
- path: output/cat/file.out
- path: output/cat/cat.txt
md5sum: f44b33a0e441ad58b2d3700270e2dbe2
- name: cat gzipped
command: nextflow run ./tests/modules/cat/cat -entry test_cat_gzipped -c tests/config/nextflow.config
- name: cat zipped zipped
command: nextflow run ./tests/modules/cat/cat -entry test_cat_zipped_zipped -c tests/config/nextflow.config
tags:
- cat
- cat/cat
files:
- path: output/cat/file.out.gz
- path: output/cat/cat.txt.gz
- name: cat ungzipped suffix
command: nextflow run ./tests/modules/cat/cat -entry test_cat_ungzipped_fna -c tests/config/nextflow.config
- name: cat zipped unzipped
command: nextflow run ./tests/modules/cat/cat -entry test_cat_zipped_unzipped -c tests/config/nextflow.config
tags:
- cat
- cat/cat
files:
- path: output/cat/file.fna
md5sum: f44b33a0e441ad58b2d3700270e2dbe2
- path: output/cat/cat.txt
md5sum: c439d3b60e7bc03e8802a451a0d9a5d9
- name: cat gzipped suffix
command: nextflow run ./tests/modules/cat/cat -entry test_cat_gzipped_fna -c tests/config/nextflow.config
- name: cat unzipped zipped
command: nextflow run ./tests/modules/cat/cat -entry test_cat_unzipped_zipped -c tests/config/nextflow.config
tags:
- cat
- cat/cat
files:
- path: output/cat/file.fna.gz
- path: output/cat/cat.txt.gz