diff --git a/modules/fastp/main.nf b/modules/fastp/main.nf index 120392c5..d9134e14 100644 --- a/modules/fastp/main.nf +++ b/modules/fastp/main.nf @@ -28,19 +28,23 @@ process FASTP { def args = task.ext.args ?: '' // Added soft-links to original fastqs for consistent naming in MultiQC def prefix = task.ext.prefix ?: "${meta.id}" + // Use single ended for interleaved. Add --interleaved_in in config. if (meta.single_end) { def fail_fastq = save_trimmed_fail ? "--failed_out ${prefix}.fail.fastq.gz" : '' """ [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz - fastp \\ + cat ${prefix}.fastq.gz \\ + | fastp \\ + --stdin \\ + --stdout \\ --in1 ${prefix}.fastq.gz \\ - --out1 ${prefix}.fastp.fastq.gz \\ --thread $task.cpus \\ --json ${prefix}.fastp.json \\ --html ${prefix}.fastp.html \\ $fail_fastq \\ $args \\ - 2> ${prefix}.fastp.log + 2> ${prefix}.fastp.log \\ + | gzip -c > ${prefix}.fastp.fastq.gz cat <<-END_VERSIONS > versions.yml "${task.process}": fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") diff --git a/modules/fastp/meta.yml b/modules/fastp/meta.yml index 2bd2b1a9..598c3368 100644 --- a/modules/fastp/meta.yml +++ b/modules/fastp/meta.yml @@ -15,7 +15,7 @@ input: - meta: type: map description: | - Groovy Map containing sample information + Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads. e.g. [ id:'test', single_end:false ] - reads: type: file diff --git a/tests/config/test_data.config b/tests/config/test_data.config index ebf3b063..ecb69f98 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -61,6 +61,7 @@ params { test_1_fastq_gz = "${test_data_dir}/genomics/sarscov2/illumina/fastq/test_1.fastq.gz" test_2_fastq_gz = "${test_data_dir}/genomics/sarscov2/illumina/fastq/test_2.fastq.gz" + test_interleaved_fastq_gz = "${test_data_dir}/genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz" test2_1_fastq_gz = "${test_data_dir}/genomics/sarscov2/illumina/fastq/test2_1.fastq.gz" test2_2_fastq_gz = "${test_data_dir}/genomics/sarscov2/illumina/fastq/test2_2.fastq.gz" test_methylated_1_fastq_gz = "${test_data_dir}/genomics/sarscov2/illumina/fastq/test.methylated_1.fastq.gz" diff --git a/tests/modules/fastp/main.nf b/tests/modules/fastp/main.nf index d1540974..5073673a 100644 --- a/tests/modules/fastp/main.nf +++ b/tests/modules/fastp/main.nf @@ -31,6 +31,19 @@ workflow test_fastp_paired_end { FASTP ( input, save_trimmed_fail, save_merged ) } +// +// Test with intereleaved data +// +workflow test_fastp_interleaved { + input = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) ] + ] + save_trimmed_fail = false + save_merged = false + + FASTP ( input, save_trimmed_fail, save_merged ) +} + // // Test with single-end data with saving trimming fails // diff --git a/tests/modules/fastp/nextflow.config b/tests/modules/fastp/nextflow.config index 8730f1c4..18236ca0 100644 --- a/tests/modules/fastp/nextflow.config +++ b/tests/modules/fastp/nextflow.config @@ -2,4 +2,7 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + withName: '.*test_fastp_interleaved:FASTP' { + ext.args = "--interleaved_in" + } } diff --git a/tests/modules/fastp/test.yml b/tests/modules/fastp/test.yml index dd1f62f4..b74ea1e8 100644 --- a/tests/modules/fastp/test.yml +++ b/tests/modules/fastp/test.yml @@ -4,13 +4,19 @@ - fastp files: - path: output/fastp/test.fastp.fastq.gz - md5sum: 4ce5c2b4db68a743cb0635ce7da3b9a4 + contains: + - "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1" + - "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT" + - "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE12.922000 K (92.984097%)" - "single end (151 cycles)" - path: output/fastp/test.fastp.json - md5sum: 7ee735cefb67f549dc857eefb9e7f123 + md5sum: 803a024342be986f76486f6ffea15909 - path: output/fastp/test.fastp.log contains: - "Q20 bases: 12922(92.9841%)" @@ -33,9 +39,45 @@ - "No adapter detected for read1" - "Q30 bases: 12281(88.3716%)" - path: output/fastp/test_1.fastp.fastq.gz - md5sum: 4ce5c2b4db68a743cb0635ce7da3b9a4 + contains: + - "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1" + - "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT" + - "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE25.719000 K (93.033098%)" + - "paired end (151 cycles + 151 cycles)" + - path: output/fastp/test.fastp.json + md5sum: 5b70f43f33778d278a84b3e9270fa114 + - path: output/fastp/test.fastp.log + contains: + - "Q20 bases: 12922(92.9841%)" + - "reads passed filter: 198" - name: fastp test_fastp_single_end_trim_fail command: nextflow run ./tests/modules/fastp -entry test_fastp_single_end_trim_fail -c ./tests/config/nextflow.config -c ./tests/modules/fastp/nextflow.config @@ -43,15 +85,24 @@ - fastp files: - path: output/fastp/test.fail.fastq.gz - md5sum: b57f2026eb259a0b0c0b3960c270258d + contains: + - "@ERR5069949.885966 NS500628:121:HK3MMAFX2:4:11610:19682:20132/1 failed_quality_filter" + - "GTCTAATCATAATTTCTTGGTACAGGCTGGTATTGTTCATCTCAGGGTTATTGGACATTCTATGCAAAATTGTGTACTT" + - "AAA//E/EAA/E//E//E//E/E//AE/A/E//EAEA///AE//E///E/EEE6EEEAEEA///E/AEE/EAEE/E//E" - path: output/fastp/test.fastp.fastq.gz - md5sum: 4ce5c2b4db68a743cb0635ce7da3b9a4 + contains: + - "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1" + - "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT" + - "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE12.922000 K (92.984097%)" - "single end (151 cycles)" - path: output/fastp/test.fastp.json - md5sum: feafc4181a2a61b4b52d9c2b59b419ad + md5sum: b647fa752d3fe7956d17429bfe27d72c - path: output/fastp/test.fastp.log contains: - "Q20 bases: 12922(92.9841%)" @@ -73,14 +124,28 @@ - path: output/fastp/test.fastp.json contains: - '"passed_filter_reads": 198' - - path: output/fastp/test_1.fail.fastq.gz - md5sum: d41d8cd98f00b204e9800998ecf8427e - path: output/fastp/test_1.fastp.fastq.gz - md5sum: 4ce5c2b4db68a743cb0635ce7da3b9a4 - - path: output/fastp/test_2.fail.fastq.gz - md5sum: 72d0002841967676ac936d08746a9128 + contains: + - "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1" + - "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT" + - "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE