Fix duplicated input in single-end fastp command (#2034)

* Update main.nf

* Update meta.yml

* Re-add logos as not staged in a way that works with MultiQC config files

* Removes duplicate input to single end  FASTP

* Fix md5sums

* Separate interleaved from normal SE command to continue supporting `split_by_lines`

* Simplify save_trimmed_fail

* Update modules/fastp/meta.yml
This commit is contained in:
James A. Fellows Yates 2022-09-09 11:08:52 +02:00 committed by GitHub
parent 65800cee75
commit 2c70c1c195
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 31 additions and 11 deletions

View file

@ -26,16 +26,15 @@ process FASTP {
script:
def args = task.ext.args ?: ''
// Added soft-links to original fastqs for consistent naming in MultiQC
def prefix = task.ext.prefix ?: "${meta.id}"
def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : ''
// Added soft-links to original fastqs for consistent naming in MultiQC
// Use single ended for interleaved. Add --interleaved_in in config.
if (meta.single_end) {
def fail_fastq = save_trimmed_fail ? "--failed_out ${prefix}.fail.fastq.gz" : ''
if ( task.ext.args?.contains('--interleaved_in') ) {
"""
[ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz
cat ${prefix}.fastq.gz \\
| fastp \\
--stdin \\
fastp \\
--stdout \\
--in1 ${prefix}.fastq.gz \\
--thread $task.cpus \\
@ -45,13 +44,33 @@ process FASTP {
$args \\
2> ${prefix}.fastp.log \\
| gzip -c > ${prefix}.fastp.fastq.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
END_VERSIONS
"""
} else if (meta.single_end) {
"""
[ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz
fastp \\
--stdout \\
--in1 ${prefix}.fastq.gz \\
--out1 ${prefix}.fastp.fastq.gz \\
--thread $task.cpus \\
--json ${prefix}.fastp.json \\
--html ${prefix}.fastp.html \\
$fail_fastq \\
$args \\
2> ${prefix}.fastp.log
cat <<-END_VERSIONS > versions.yml
"${task.process}":
fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
END_VERSIONS
"""
} else {
def fail_fastq = save_trimmed_fail ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : ''
def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : ''
"""
[ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz

View file

@ -21,7 +21,8 @@ input:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
respectively. If you wish to run interleaved paired-end data, supply as single-end data
but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module.
- save_trimmed_fail:
type: boolean
description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz`

View file

@ -16,7 +16,7 @@
- "Q20 bases:</td><td class='col2'>12.922000 K (92.984097%)"
- "single end (151 cycles)"
- path: output/fastp/test.fastp.json
md5sum: 803a024342be986f76486f6ffea15909
md5sum: 2616b6791fd89fb1cc2d16a73b9463b0
- path: output/fastp/test.fastp.log
contains:
- "Q20 bases: 12922(92.9841%)"
@ -73,7 +73,7 @@
- "Q20 bases:</td><td class='col2'>25.719000 K (93.033098%)"
- "paired end (151 cycles + 151 cycles)"
- path: output/fastp/test.fastp.json
md5sum: 5b70f43f33778d278a84b3e9270fa114
md5sum: 25ec85e3534f380ca2109c894671f1ed
- path: output/fastp/test.fastp.log
contains:
- "Q20 bases: 12922(92.9841%)"
@ -102,7 +102,7 @@
- "Q20 bases:</td><td class='col2'>12.922000 K (92.984097%)"
- "single end (151 cycles)"
- path: output/fastp/test.fastp.json
md5sum: b647fa752d3fe7956d17429bfe27d72c
md5sum: 7e10b0b12fab5cff620fdeb1a32392f8
- path: output/fastp/test.fastp.log
contains:
- "Q20 bases: 12922(92.9841%)"