Add samtools/collatefastq module (#1536)

* add samtools/collatefastq module

* update yml file

* improve output
This commit is contained in:
Maxime U. Garcia 2022-04-20 10:05:17 +02:00 committed by GitHub
parent 7630e278f3
commit 705f8c9ac4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 131 additions and 0 deletions

View file

@ -0,0 +1,47 @@
process SAMTOOLS_COLLATEFASTQ {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' :
'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }"
input:
tuple val(meta), path(input)
output:
//TODO might be good to have ordered output of the fastq files, so we can
// make sure the we get the right files
tuple val(meta), path("*_{1,2}.fq.gz"), path("*_other.fq.gz"), path("*_singleton.fq.gz"), emit: reads
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def args2 = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
samtools collate \\
$args \\
--threads $task.cpus \\
-O \\
$input \\
. |
samtools fastq \\
$args2 \\
--threads $task.cpus \\
-1 ${prefix}_1.fq.gz \\
-2 ${prefix}_2.fq.gz \\
-0 ${prefix}_other.fq.gz \\
-s ${prefix}_singleton.fq.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
}

View file

@ -0,0 +1,48 @@
name: samtools_collatefastq
description: |
The module uses collate and then fastq methods from samtools to
convert a SAM, BAM or CRAM file to FASTQ format
keywords:
- bam2fq
- samtools
- fastq
tools:
- samtools:
description: Tools for dealing with SAM, BAM and CRAM files
homepage: None
documentation: http://www.htslib.org/doc/1.1/samtools.html
tool_dev_url: None
doi: ""
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- input:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
FASTQ files, which will be either a group of 4 files (read_1, read_2, other and singleton)
or a single interleaved .fq.gz file if the user chooses not to split the reads.
pattern: "*.fq.gz"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@lescai"
- "@maxulysse"

View file

@ -1567,6 +1567,10 @@ samtools/bam2fq:
- modules/samtools/bam2fq/** - modules/samtools/bam2fq/**
- tests/modules/samtools/bam2fq/** - tests/modules/samtools/bam2fq/**
samtools/collatefastq:
- modules/samtools/collatefastq/**
- tests/modules/samtools/collatefastq/**
samtools/depth: samtools/depth:
- modules/samtools/depth/** - modules/samtools/depth/**
- tests/modules/samtools/depth/** - tests/modules/samtools/depth/**

View file

@ -0,0 +1,13 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { SAMTOOLS_COLLATEFASTQ } from '../../../../modules/samtools/collatefastq/main.nf'
workflow test_samtools_collatefastq {
input = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true)
]
SAMTOOLS_COLLATEFASTQ ( input )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,14 @@
- name: samtools fastq test_samtools_collatefastq
command: nextflow run ./tests/modules/samtools/collatefastq -entry test_samtools_collatefastq -c ./tests/config/nextflow.config -c ./tests/modules/samtools/collatefastq/nextflow.config
tags:
- samtools
- samtools/collatefastq
files:
- path: output/samtools/test_1.fq.gz
md5sum: 829732de4e937edca90f27b07e5b501a
- path: output/samtools/test_2.fq.gz
md5sum: ef27d3809e495620fd93df894280c03a
- path: output/samtools/test_other.fq.gz
md5sum: 709872fc2910431b1e8b7074bfe38c67
- path: output/samtools/test_singleton.fq.gz
md5sum: 709872fc2910431b1e8b7074bfe38c67