1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-22 09:19:54 +00:00

Merge pull request #30 from nf-core/database-untar

Adds ability to conditionally untar databases TAR files that contain DB directories
This commit is contained in:
James A. Fellows Yates 2022-03-21 19:56:22 +01:00 committed by GitHub
commit cd9e3c604c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 92 additions and 4 deletions

View file

@ -91,7 +91,7 @@ process {
publishDir = [
path: { "${params.outdir}/kraken2/${meta.db_name}" },
mode: 'copy',
pattern: '.{fastq.gz,txt}'
pattern: '*.{fastq.gz,txt}'
]
ext.args = { "${meta.db_params}" }
ext.prefix = { "${meta.id}-${meta.db_name}" }

View file

@ -24,6 +24,8 @@
"multiqc": {
"git_sha": "20d8250d9f39ddb05dfb437603aaf99b5c0b2b41"
},
"untar": {
"git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918"
"porechop": {
"git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046"
}

36
modules/nf-core/modules/untar/main.nf generated Normal file
View file

@ -0,0 +1,36 @@
process UNTAR {
tag "$archive"
label 'process_low'
conda (params.enable_conda ? "conda-forge::tar=1.32" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' :
'biocontainers/biocontainers:v1.2.0_cv1' }"
input:
tuple val(meta), path(archive)
output:
tuple val(meta), path("$untar"), emit: untar
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
untar = archive.toString() - '.tar.gz'
"""
tar \\
-xzvf \\
$args \\
$archive \\
$args2 \\
cat <<-END_VERSIONS > versions.yml
"${task.process}":
untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//')
END_VERSIONS
"""
}

38
modules/nf-core/modules/untar/meta.yml generated Normal file
View file

@ -0,0 +1,38 @@
name: untar
description: Extract files.
keywords:
- untar
- uncompress
tools:
- untar:
description: |
Extract tar.gz files.
documentation: https://www.gnu.org/software/tar/manual/
licence: ["GPL-3.0-or-later"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- archive:
type: file
description: File to be untar
pattern: "*.{tar}.{gz}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- untar:
type: file
description:
pattern: "*.*"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@joseespinosa"
- "@drpatelh"

View file

@ -3,6 +3,7 @@
//
include { DATABASE_CHECK } from '../../modules/local/database_check'
include { UNTAR } from '../../modules/nf-core/modules/untar/main'
workflow DB_CHECK {
take:
@ -17,10 +18,21 @@ workflow DB_CHECK {
.dump(tag: "db_split_csv_out")
.map { create_db_channels(it) }
.dump(tag: "db_channel_prepped")
.set{ dbs }
ch_dbs_for_untar = parsed_samplesheet
.branch {
untar: it[1].toString().endsWith(".tar.gz")
skip: true
}
// TODO Filter to only run UNTAR on DBs of tools actually using?
// TODO make optional whether to save
UNTAR ( ch_dbs_for_untar.untar )
ch_final_dbs = ch_dbs_for_untar.skip.mix( UNTAR.out.untar )
emit:
dbs // channel: [ val(meta), [ db ] ]
dbs = ch_final_dbs // channel: [ val(meta), [ db ] ]
versions = DATABASE_CHECK.out.versions // channel: [ versions.yml ]
}

View file

@ -42,7 +42,7 @@ workflow INPUT_CHECK {
}
// Function to get list of [ meta, [ fastq_1, fastq_2 ] ]
def create_fastq_channel(LinkedHashMap row) {
def create_fastq_channels(LinkedHashMap row) {
// create meta map
def meta = [:]
meta.id = row.sample