enhance module fastp: add save_merged (#598) (#614)

* enhance module fastp: add `save_merged` (#598)

* removed md5sum checks from log and json
This commit is contained in:
Johnathan D 2021-07-23 10:44:00 +01:00 committed by GitHub
parent 3cabc95d0e
commit 6f561b3b41
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 63 additions and 12 deletions

View file

@ -21,6 +21,7 @@ process FASTP {
input: input:
tuple val(meta), path(reads) tuple val(meta), path(reads)
val save_trimmed_fail val save_trimmed_fail
val save_merged
output: output:
tuple val(meta), path('*.trim.fastq.gz') , emit: reads tuple val(meta), path('*.trim.fastq.gz') , emit: reads
@ -29,6 +30,7 @@ process FASTP {
tuple val(meta), path('*.log') , emit: log tuple val(meta), path('*.log') , emit: log
path '*.version.txt' , emit: version path '*.version.txt' , emit: version
tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail
tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged
script: script:
// Added soft-links to original fastqs for consistent naming in MultiQC // Added soft-links to original fastqs for consistent naming in MultiQC
@ -51,6 +53,7 @@ process FASTP {
""" """
} else { } else {
def fail_fastq = save_trimmed_fail ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' def fail_fastq = save_trimmed_fail ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : ''
def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : ''
""" """
[ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz
[ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz
@ -62,6 +65,7 @@ process FASTP {
--json ${prefix}.fastp.json \\ --json ${prefix}.fastp.json \\
--html ${prefix}.fastp.html \\ --html ${prefix}.fastp.html \\
$fail_fastq \\ $fail_fastq \\
$merge_fastq \\
--thread $task.cpus \\ --thread $task.cpus \\
--detect_adapter_for_pe \\ --detect_adapter_for_pe \\
$options.args \\ $options.args \\

View file

@ -30,7 +30,7 @@ output:
e.g. [ id:'test', single_end:false ] e.g. [ id:'test', single_end:false ]
- reads: - reads:
type: file type: file
description: The trimmed/modified fastq reads description: The trimmed/modified/unmerged fastq reads
pattern: "*trim.fastq.gz" pattern: "*trim.fastq.gz"
- json: - json:
type: file type: file
@ -52,6 +52,10 @@ output:
type: file type: file
description: Reads the failed the preprocessing description: Reads the failed the preprocessing
pattern: "*fail.fastq.gz" pattern: "*fail.fastq.gz"
- reads_merged:
type: file
description: Reads that were successfully merged
pattern: "*.{merged.fastq.gz}"
authors: authors:
- "@drpatelh" - "@drpatelh"
- "@kevinmenden" - "@kevinmenden"

View file

@ -12,8 +12,9 @@ workflow test_fastp_single_end {
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
] ]
save_trimmed_fail = false save_trimmed_fail = false
save_merged = false
FASTP ( input, save_trimmed_fail ) FASTP ( input, save_trimmed_fail, save_merged )
} }
// //
@ -25,8 +26,9 @@ workflow test_fastp_paired_end {
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
] ]
save_trimmed_fail = false save_trimmed_fail = false
save_merged = false
FASTP ( input, save_trimmed_fail ) FASTP ( input, save_trimmed_fail, save_merged )
} }
// //
@ -37,8 +39,9 @@ workflow test_fastp_single_end_trim_fail {
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
] ]
save_trimmed_fail = true save_trimmed_fail = true
save_merged = false
FASTP ( input, save_trimmed_fail ) FASTP ( input, save_trimmed_fail, save_merged )
} }
// //
@ -50,6 +53,21 @@ workflow test_fastp_paired_end_trim_fail {
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
] ]
save_trimmed_fail = true save_trimmed_fail = true
save_merged = false
FASTP ( input, save_trimmed_fail ) FASTP ( input, save_trimmed_fail, save_merged )
}
//
// Test with paired-end data with merging
//
workflow test_fastp_paired_end_merged {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
]
save_trimmed_fail = false
save_merged = true
FASTP ( input, save_trimmed_fail, save_merged )
} }

View file

@ -81,3 +81,28 @@
md5sum: e62ff0123a74adfc6903d59a449cbdb0 md5sum: e62ff0123a74adfc6903d59a449cbdb0
- path: output/fastp/test_2.fail.fastq.gz - path: output/fastp/test_2.fail.fastq.gz
md5sum: f52309b35a7c15cbd56a9c3906ef98a5 md5sum: f52309b35a7c15cbd56a9c3906ef98a5
- name: fastp test_fastp_paired_end_merged
command: nextflow run tests/modules/fastp -entry test_fastp_paired_end_merged -c tests/config/nextflow.config
tags:
- fastp
files:
- path: output/fastp/test.fastp.html
contains:
- "<div id='After_filtering__merged__quality'>"
- path: output/fastp/test.fastp.json
contains:
- '"merged_and_filtered": {'
- '"total_reads": 75'
- '"total_bases": 13683'
- path: output/fastp/test.fastp.log
contains:
- "Merged and filtered:"
- "total reads: 75"
- "total bases: 13683"
- path: output/fastp/test.merged.fastq.gz
md5sum: ce88539076ced5aff11f866836ea1f40
- path: output/fastp/test_1.trim.fastq.gz
md5sum: 65d75c13abbfbfd993914e1379634100
- path: output/fastp/test_2.trim.fastq.gz
md5sum: 0d87ce4d8ef29fb35f337eb0f6c9fcb4