Add PRINSEQPLUSPLUS (#1481)

* fix: remove left-over unnecessary code

* Add prinseq++

* Remove last todo

* Fix tests due to variability of output FASTQs (reads can be ordered differently between runs)

* Apply suggestions from code review
This commit is contained in:
James A. Fellows Yates 2022-04-03 16:06:22 +02:00 committed by GitHub
parent 67c1bc9568
commit f1c5384c31
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 185 additions and 0 deletions

View file

@ -0,0 +1,61 @@
process PRINSEQPLUSPLUS {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::prinseq-plus-plus=1.2.3" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/prinseq-plus-plus:1.2.3--hc90279e_1':
'quay.io/biocontainers/prinseq-plus-plus:1.2.3--hc90279e_1' }"
input:
tuple val(meta), path(reads)
output:
tuple val(meta), path("*_good_out*.fastq.gz") , emit: good_reads
tuple val(meta), path("*_single_out*.fastq.gz"), optional: true, emit: single_reads
tuple val(meta), path("*_bad_out*.fastq.gz") , optional: true, emit: bad_reads
tuple val(meta), path("*.log") , emit: log
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
if (meta.single_end) {
"""
prinseq++ \\
-threads $task.cpus \\
-fastq ${reads} \\
-out_name ${prefix} \\
-out_gz \\
-VERBOSE 1 \\
$args \\
| tee ${prefix}.log
cat <<-END_VERSIONS > versions.yml
"${task.process}":
prinseqplusplus: \$(echo \$(prinseq++ --version | cut -f 2 -d ' ' ))
END_VERSIONS
"""
} else {
"""
prinseq++ \\
-threads $task.cpus \\
-fastq ${reads[0]} \\
-fastq2 ${reads[1]} \\
-out_name ${prefix} \\
-out_gz \\
-VERBOSE 1 \\
$args \\
| tee ${prefix}.log
cat <<-END_VERSIONS > versions.yml
"${task.process}":
prinseqplusplus: \$(echo \$(prinseq++ --version | cut -f 2 -d ' ' ))
END_VERSIONS
"""
}
}

View file

@ -0,0 +1,60 @@
name: "prinseqplusplus"
description: PRINSEQ++ is a C++ implementation of the prinseq-lite.pl program. It can be used to filter, reformat or trim genomic and metagenomic sequence data
keywords:
- fastq
- fasta
- filter
- trim
tools:
- "prinseqplusplus":
description: "PRINSEQ++ - Multi-threaded C++ sequence cleaning"
homepage: "https://github.com/Adrian-Cantu/PRINSEQ-plus-plus"
documentation: "https://github.com/Adrian-Cantu/PRINSEQ-plus-plus"
tool_dev_url: "https://github.com/Adrian-Cantu/PRINSEQ-plus-plus"
doi: "10.7287/peerj.preprints.27553v1"
licence: "['GPL v2']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end
data, respectively.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- good_reads:
type: file
description: Reads passing filter(s) in gzipped FASTQ format
pattern: "*_good_out_{R1,R2}.fastq.gz"
- single_reads:
type: file
description: |
Single reads without the pair passing filter(s) in gzipped FASTQ format
pattern: "*_single_out_{R1,R2}.fastq.gz"
- bad_reads:
type: file
description: |
Reads without not passing filter(s) in gzipped FASTQ format
pattern: "*_bad_out_{R1,R2}.fastq.gz"
- log:
type: file
description: |
Verbose level 2 STDOUT information in a log file
pattern: "*.log"
authors:
- "@jfy133"

View file

@ -1407,6 +1407,10 @@ preseq/lcextrap:
- modules/preseq/lcextrap/** - modules/preseq/lcextrap/**
- tests/modules/preseq/lcextrap/** - tests/modules/preseq/lcextrap/**
prinseqplusplus:
- modules/prinseqplusplus/**
- tests/modules/prinseqplusplus/**
prodigal: prodigal:
- modules/prodigal/** - modules/prodigal/**
- tests/modules/prodigal/** - tests/modules/prodigal/**

View file

@ -0,0 +1,24 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { PRINSEQPLUSPLUS } from '../../../modules/prinseqplusplus/main.nf'
workflow test_prinseqplusplus_single_end {
input = [ [ id:'test', single_end:true ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
]
PRINSEQPLUSPLUS ( input )
}
workflow test_prinseqplusplus_paired_end {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
]
PRINSEQPLUSPLUS ( input )
}

View file

@ -0,0 +1,9 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: PRINSEQPLUSPLUS {
ext.args = "-lc_entropy=0.8"
}
}

View file

@ -0,0 +1,27 @@
- name: prinseqplusplus test_prinseqplusplus_single_end
command: nextflow run tests/modules/prinseqplusplus -entry test_prinseqplusplus_single_end -c tests/config/nextflow.config
tags:
- prinseqplusplus
files:
- path: output/prinseqplusplus/test.log
contains:
- "reads removed by -lc_entropy"
- path: output/prinseqplusplus/test_bad_out.fastq.gz
- path: output/prinseqplusplus/test_good_out.fastq.gz
- path: output/prinseqplusplus/versions.yml
- name: prinseqplusplus test_prinseqplusplus_paired_end
command: nextflow run tests/modules/prinseqplusplus -entry test_prinseqplusplus_paired_end -c tests/config/nextflow.config
tags:
- prinseqplusplus
files:
- path: output/prinseqplusplus/test.log
contains:
- "reads removed by -lc_entropy"
- path: output/prinseqplusplus/test_bad_out_R1.fastq.gz
- path: output/prinseqplusplus/test_bad_out_R2.fastq.gz
- path: output/prinseqplusplus/test_good_out_R1.fastq.gz
- path: output/prinseqplusplus/test_good_out_R2.fastq.gz
- path: output/prinseqplusplus/test_single_out_R1.fastq.gz
- path: output/prinseqplusplus/test_single_out_R2.fastq.gz
- path: output/prinseqplusplus/versions.yml