Refactor cat/cat module (#723)

* Refactor cat/cat module

* Change gzip terminology
This commit is contained in:
Harshil Patel 2021-09-21 11:17:00 +01:00 committed by GitHub
parent b32c46c6a2
commit 1d6f47ce54
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 68 additions and 54 deletions

View file

@ -18,22 +18,36 @@ process CAT_CAT {
} }
input: input:
path files path files_in
val file_out
output: output:
path "file*" , emit: file path "${file_out}*" , emit: file_out
path "*.version.txt", emit: version path "*.version.txt", emit: version
script: script:
def software = getSoftwareName(task.process) def file_list = files_in.collect { it.toString() }
cpus = Math.floor(task.cpus/2).toInteger() if (file_list.size > 1) {
// Use options.suffix if specified, otherwise .out; add .gz if first input file has it // | input | output | command1 | command2 |
suffix = options.suffix ? "${options.suffix}" : ".out" // |-----------|------------|----------|----------|
suffix += files[0].name =~ /\.gz/ ? '.gz' : '' // | gzipped | gzipped | cat | |
// | ungzipped | ungzipped | cat | |
// | gzipped | ungzipped | zcat | |
// | ungzipped | gzipped | cat | pigz |
def in_zip = file_list[0].endsWith('.gz')
def out_zip = file_out.endsWith('.gz')
def command1 = (in_zip && !out_zip) ? 'zcat' : 'cat'
def command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $options.args2" : ''
""" """
cat ${options.args} $files ${options.args2} > file${suffix} $command1 \\
cat --version | grep 'GNU coreutils' | sed 's/cat (GNU coreutils) //' > ${software}.version.txt $options.args \\
${file_list.join(' ')} \\
$command2 \\
> $file_out
echo \$(pigz --version 2>&1) | sed 's/pigz //g' > pigz.version.txt
""" """
} }
}

View file

@ -1,33 +1,34 @@
name: cat_cat name: cat_cat
description: A module for concatenation of gzipped or uncompressed files, optionally filtering the output description: A module for concatenation of gzipped or uncompressed files
keywords: keywords:
- concatenate - concatenate
- gzip - gzip
- filter - cat
tools: tools:
- cat: - cat:
description: Just concatenation description: Just concatenation
homepage: None homepage: None
documentation: None documentation: https://man7.org/linux/man-pages/man1/cat.1.html
tool_dev_url: None tool_dev_url: None
doi: ""
licence: ""
input: input:
- files: - files_in:
type: file type: file
description: Gzipped or not files description: List of compressed / uncompressed files
pattern: "*" pattern: "*"
- file_out:
type: value
description: Full name of output file with or without .gz extension
output: output:
- version: - version:
type: file type: file
description: File containing version of the pigz software description: File containing version of the pigz software
pattern: "*.{version.txt}" pattern: "*.{version.txt}"
- bam: - file_out:
type: file type: file
description: Concatenated, optionally filtered, file, gzipped if input was, otherwise not description: Concatenated file. Will be gzipped if file_out ends with ".gz"
pattern: "file*" pattern: "${file_out}"
authors: authors:
- "@erikrikarddaniel" - "@erikrikarddaniel"

View file

@ -3,44 +3,43 @@
nextflow.enable.dsl = 2 nextflow.enable.dsl = 2
include { CAT_CAT } from '../../../../modules/cat/cat/main.nf' addParams( options: [:] ) include { CAT_CAT } from '../../../../modules/cat/cat/main.nf' addParams( options: [:] )
include { CAT_CAT as CAT_CAT_SUFFIX } from '../../../../modules/cat/cat/main.nf' addParams( options: [suffix: ".fna"] )
workflow test_cat_ungzipped { workflow test_cat_unzipped_unzipped {
input = [ input = [
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true)
] ]
CAT_CAT ( input ) CAT_CAT ( input, 'cat.txt' )
} }
workflow test_cat_gzipped { workflow test_cat_zipped_zipped {
input = [ input = [
file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true), file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true)
] ]
CAT_CAT ( input ) CAT_CAT ( input, 'cat.txt.gz' )
} }
workflow test_cat_ungzipped_fna { workflow test_cat_zipped_unzipped {
input = [
file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true)
]
CAT_CAT ( input, 'cat.txt' )
}
workflow test_cat_unzipped_zipped {
input = [ input = [
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true)
] ]
CAT_CAT_SUFFIX ( input ) CAT_CAT ( input, 'cat.txt.gz' )
}
workflow test_cat_gzipped_fna {
input = [
file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true)
]
CAT_CAT_SUFFIX ( input )
} }

View file

@ -1,33 +1,33 @@
- name: cat ungzipped - name: cat unzipped unzipped
command: nextflow run ./tests/modules/cat/cat -entry test_cat_ungzipped -c tests/config/nextflow.config command: nextflow run ./tests/modules/cat/cat -entry test_cat_unzipped_unzipped -c tests/config/nextflow.config
tags: tags:
- cat - cat
- cat/cat - cat/cat
files: files:
- path: output/cat/file.out - path: output/cat/cat.txt
md5sum: f44b33a0e441ad58b2d3700270e2dbe2 md5sum: f44b33a0e441ad58b2d3700270e2dbe2
- name: cat gzipped - name: cat zipped zipped
command: nextflow run ./tests/modules/cat/cat -entry test_cat_gzipped -c tests/config/nextflow.config command: nextflow run ./tests/modules/cat/cat -entry test_cat_zipped_zipped -c tests/config/nextflow.config
tags: tags:
- cat - cat
- cat/cat - cat/cat
files: files:
- path: output/cat/file.out.gz - path: output/cat/cat.txt.gz
- name: cat ungzipped suffix - name: cat zipped unzipped
command: nextflow run ./tests/modules/cat/cat -entry test_cat_ungzipped_fna -c tests/config/nextflow.config command: nextflow run ./tests/modules/cat/cat -entry test_cat_zipped_unzipped -c tests/config/nextflow.config
tags: tags:
- cat - cat
- cat/cat - cat/cat
files: files:
- path: output/cat/file.fna - path: output/cat/cat.txt
md5sum: f44b33a0e441ad58b2d3700270e2dbe2 md5sum: c439d3b60e7bc03e8802a451a0d9a5d9
- name: cat gzipped suffix - name: cat unzipped zipped
command: nextflow run ./tests/modules/cat/cat -entry test_cat_gzipped_fna -c tests/config/nextflow.config command: nextflow run ./tests/modules/cat/cat -entry test_cat_unzipped_zipped -c tests/config/nextflow.config
tags: tags:
- cat - cat
- cat/cat - cat/cat
files: files:
- path: output/cat/file.fna.gz - path: output/cat/cat.txt.gz