Upgraded getchromsizes to support meta and output the gzi index (#2046)

* This process is single-threaded

* Added meta to match the other nf-core modules

* "custom" is not a great name

* Newer modules have a stub

* To match the other modules

* self promotion

* Output the gzi file too, if there is one

* More Fasta extensions

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>
This commit is contained in:
Matthieu Muffato 2022-09-12 13:17:13 +01:00 committed by GitHub
parent 2c70c1c195
commit d75b37fef1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 62 additions and 10 deletions

View file

@ -1,6 +1,6 @@
process CUSTOM_GETCHROMSIZES {
tag "$fasta"
label 'process_low'
label 'process_single'
conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
@ -8,12 +8,13 @@ process CUSTOM_GETCHROMSIZES {
'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }"
input:
path fasta
tuple val(meta), path(fasta)
output:
path '*.sizes' , emit: sizes
path '*.fai' , emit: fai
path "versions.yml", emit: versions
tuple val(meta), path ("*.sizes"), emit: sizes
tuple val(meta), path ("*.fai") , emit: fai
tuple val(meta), path ("*.gzi") , emit: gzi, optional: true
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
@ -26,7 +27,18 @@ process CUSTOM_GETCHROMSIZES {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
custom: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
getchromsizes: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
stub:
"""
touch ${fasta}.fai
touch ${fasta}.sizes
cat <<-END_VERSIONS > versions.yml
"${task.process}":
getchromsizes: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
}

View file

@ -14,12 +14,22 @@ tools:
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- fasta:
type: file
description: FASTA file
pattern: "*.{fasta}"
pattern: "*.{fa,fasta,fna,fas}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- sizes:
type: file
description: File containing chromosome lengths
@ -28,11 +38,16 @@ output:
type: file
description: FASTA index file
pattern: "*.{fai}"
- gzi:
type: file
description: Optional gzip index file for compressed inputs
pattern: "*.gzi"
- versions:
type: file
description: File containing software version
description: File containing software versions
pattern: "versions.yml"
authors:
- "@tamara-hodgetts"
- "@chris-cheshire"
- "@muffato"

View file

@ -5,8 +5,17 @@ nextflow.enable.dsl = 2
include { CUSTOM_GETCHROMSIZES } from '../../../../modules/custom/getchromsizes/main.nf'
workflow test_custom_getchromsizes {
input = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
input = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
CUSTOM_GETCHROMSIZES ( input )
}
workflow test_custom_getchromsizes_bgzip {
input = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['genome']['genome_fasta_gz'], checkIfExists: true) ]
CUSTOM_GETCHROMSIZES ( input )
}

View file

@ -8,3 +8,19 @@
md5sum: 9da2a56e2853dc8c0b86a9e7229c9fe5
- path: output/custom/genome.fasta.sizes
md5sum: a57c401f27ae5133823fb09fb21c8a3c
- path: output/custom/versions.yml
md5sum: 3e4a23a0852f4ec34296224d87446d9a
- name: custom getchromsizes_bgzip
command: nextflow run ./tests/modules/custom/getchromsizes -entry test_custom_getchromsizes_bgzip -c ./tests/config/nextflow.config -c ./tests/modules/custom/getchromsizes/nextflow.config
tags:
- custom
- custom/getchromsizes
files:
- path: output/custom/genome.fasta.gz.fai
md5sum: 9da2a56e2853dc8c0b86a9e7229c9fe5
- path: output/custom/genome.fasta.gz.gzi
md5sum: 7dea362b3fac8e00956a4952a3d4f474
- path: output/custom/genome.fasta.gz.sizes
md5sum: a57c401f27ae5133823fb09fb21c8a3c
- path: output/custom/versions.yml
md5sum: 22871934dfac30a6109068fd79b2d0ba