Seqkit pair (#1348)

* add seqkit pair module

* local tests

* local tests

* fix workflow name

* fix workflow name

* fix version indentation

* fix version indentation

* fix version indentation

* fix review comments

* fix review comments

* fix github usernames

* minor fix

* add meta unpaired output

Co-authored-by: Peri <rrx8@cdc.gov>
This commit is contained in:
Sateesh 2022-02-24 09:07:35 -05:00 committed by GitHub
parent 6400317623
commit 4c59984d7b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 126 additions and 0 deletions

View file

@ -0,0 +1,40 @@
process SEQKIT_PAIR {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::seqkit=2.1.0" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/seqkit:2.1.0--h9ee0642_0':
'quay.io/biocontainers/seqkit:2.1.0--h9ee0642_0' }"
input:
tuple val(meta), path(reads)
output:
tuple val(meta), path("*.paired.fastq.gz") , emit: reads
tuple val(meta), path("*.unpaired.fastq.gz"), optional: true, emit: unpaired_reads
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
seqkit \\
pair \\
-1 ${reads[0]} \\
-2 ${reads[1]} \\
$args \\
--threads $task.cpus
# gzip fastq
find . -maxdepth 1 -name "*.fastq" -exec gzip {} \;
cat <<-END_VERSIONS > versions.yml
"${task.process}":
seqkit: \$( seqkit | sed '3!d; s/Version: //' )
END_VERSIONS
"""
}

View file

@ -0,0 +1,48 @@
name: seqkit_pair
description: match up paired-end reads from two fastq files
keywords:
- seqkit
- pair
tools:
- seqkit:
description: Cross-platform and ultrafast toolkit for FASTA/Q file manipulation, written by Wei Shen.
homepage: https://bioinf.shenwei.me/seqkit/usage/
documentation: https://bioinf.shenwei.me/seqkit/usage/
tool_dev_url: https://github.com/shenwei356/seqkit/
doi: "10.1371/journal.pone.0163962"
licence: ['MIT']
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input paired-end FastQ files.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- reads:
type: file
description: Paired fastq reads
pattern: "*.paired.fastq.gz"
- unpaired_reads:
type: file
description: Unpaired reads (optional)
pattern: "*.unpaired.fastq.gz"
authors:
- "@sateeshperi"
- "@mjcipriano"
- "@hseabolt"

View file

@ -1445,6 +1445,10 @@ seacr/callpeak:
- modules/seacr/callpeak/** - modules/seacr/callpeak/**
- tests/modules/seacr/callpeak/** - tests/modules/seacr/callpeak/**
seqkit/pair:
- modules/seqkit/pair/**
- tests/modules/seqkit/pair/**
seqkit/split2: seqkit/split2:
- modules/seqkit/split2/** - modules/seqkit/split2/**
- tests/modules/seqkit/split2/** - tests/modules/seqkit/split2/**

View file

@ -0,0 +1,16 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { SEQKIT_PAIR } from '../../../../modules/seqkit/pair/main.nf'
workflow test_seqkit_pair {
input = [
[ id:'test', single_end:false ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
]
SEQKIT_PAIR ( input )
}

View file

@ -0,0 +1,6 @@
process {
ext.args = "-u"
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,12 @@
- name: seqkit pair test_seqkit_pair
command: nextflow run tests/modules/seqkit/pair -entry test_seqkit_pair -c tests/config/nextflow.config
tags:
- seqkit/pair
- seqkit
files:
- path: output/seqkit/test_1.paired.fastq.gz
md5sum: fbfe7e8bdbc29abaaf58b6f1a32448e5
- path: output/seqkit/test_2.paired.fastq.gz
md5sum: 7d3c0912e5adc2674e8ecc1e647381b3
- path: output/seqkit/versions.yml
md5sum: 3086293bc986fc2ece38b1951d090819