diff --git a/modules/faqcs/main.nf b/modules/faqcs/main.nf new file mode 100644 index 00000000..a03a0150 --- /dev/null +++ b/modules/faqcs/main.nf @@ -0,0 +1,103 @@ +process FAQCS { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::faqcs=2.10" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/faqcs%3A2.10--r41h9a82719_2' : + 'quay.io/biocontainers/faqcs:2.10--r41h9a82719_2' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path('*.trimmed.fastq.gz') , emit: reads + tuple val(meta), path('*.stats.txt') , emit: stats + tuple val(meta), path('*_qc_report.pdf') , optional:true, emit: statspdf + tuple val(meta), path('*.log') , emit: log + tuple val(meta), path('*.discard.fastq.gz') , optional:true, emit: reads_fail + tuple val(meta), path('*.trimmed.unpaired.fastq.gz') , optional:true, emit: reads_unpaired + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + // Added soft-links to original fastqs for consistent naming in MultiQC + def prefix = task.ext.prefix ?: "${meta.id}" + if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz + FaQCs \\ + -d . \\ + -u ${prefix}.fastq.gz \\ + --prefix ${prefix} \\ + -t $task.cpus \\ + $args \\ + 2> ${prefix}.fastp.log + + + if [[ -f ${prefix}.unpaired.trimmed.fastq ]]; then + mv ${prefix}.unpaired.trimmed.fastq ${prefix}.trimmed.fastq + gzip ${prefix}.trimmed.fastq + fi + if [[ -f ${prefix}.discard.trimmed.fastq ]]; then + mv ${prefix}.discard.trimmed.fastq ${prefix}.trimmed.discard.fastq + gzip ${prefix}.trimmed.discard.fastq + fi + cat <<-END_VERSIONS > versions.yml + "${task.process}": + faqcs: \$(echo \$(FaQCs --version 2>&1) | sed 's/^.*Version: //;' ) + END_VERSIONS + """ + } else { + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz + FaQCs \\ + -d . \\ + -1 ${prefix}_1.fastq.gz \\ + -2 ${prefix}_2.fastq.gz \\ + --prefix ${meta.id} \\ + -t $task.cpus \\ + $args \\ + 2> ${prefix}.fastp.log + + # Unpaired + if [[ -f ${prefix}.unpaired.trimmed.fastq ]]; then + # If it is empty remove it + if [[ ! -s ${prefix}.unpaired.trimmed.fastq ]]; then + rm ${prefix}.unpaired.trimmed.fastq + else + mv ${prefix}.unpaired.trimmed.fastq ${prefix}.trimmed.unpaired.fastq + gzip ${prefix}.trimmed.unpaired.fastq + fi + fi + + # R1 + if [[ -f ${prefix}.1.trimmed.fastq ]]; then + mv ${prefix}.1.trimmed.fastq ${prefix}_1.trimmed.fastq + gzip ${prefix}_1.trimmed.fastq + fi + + # R2 + if [[ -f ${prefix}.2.trimmed.fastq ]]; then + mv ${prefix}.2.trimmed.fastq ${prefix}_2.trimmed.fastq + gzip ${prefix}_2.trimmed.fastq + fi + + # Discarded: Created if --discard argument is passed + if [[ -f ${prefix}.discard.trimmed.fastq ]]; then + mv ${prefix}.discard.trimmed.fastq ${prefix}.trimmed.discard.fastq + gzip ${prefix}.trimmed.discard.fastq + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + faqcs: \$(echo \$(FaQCs --version 2>&1) | sed 's/^.*Version: //;' ) + END_VERSIONS + """ + } +} + diff --git a/modules/faqcs/meta.yml b/modules/faqcs/meta.yml new file mode 100644 index 00000000..eca35e65 --- /dev/null +++ b/modules/faqcs/meta.yml @@ -0,0 +1,68 @@ +name: faqcs +description: Perform adapter and quality trimming on sequencing reads with reporting +keywords: + - trimming + - quality control + - fastq + - faqcs +tools: + - faqcs: + description: | + FaQCs combines several features of currently available applications into a single, user-friendly process, and includes additional unique capabilities such as filtering the PhiX control sequences, conversion of FASTQ formats, and multi-threading. The original data and trimmed summaries are reported within a variety of graphics and reports, providing a simple way to do data quality control and assurance. + homepage: https://github.com/LANL-Bioinformatics/FaQCs + documentation: https://github.com/LANL-Bioinformatics/FaQCs + tool_dev_url: https://github.com/LANL-Bioinformatics/FaQCs + doi: "https://doi.org/10.1186/s12859-014-0366-2" + licence: ['GPLv3 License'] + +## TODO nf-core: Add a description of all of the variables used as input +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reads: + type: file + description: The trimmed/modified fastq reads + pattern: "*trimmed.fastq.gz" + - reads_fail: + type: file + description: Reads that failed the preprocessing (Optional with --discard args setting) + pattern: "*discard.fastq.gz" + - reads_unpaired: + type: file + description: Reads without matching mates in paired-end files (Optional) + pattern: "*trimmed.unpaired.fastq.gz" + - stats: + type: file + description: trimming/qc text stats file + pattern: "*.stats.txt" + - statspdf: + type: file + description: trimming/qc pdf report file + pattern: "*_qc_report.pdf" + - log: + type: file + description: fastq log file + pattern: "*.log" +authors: + - "@mjcipriano" + - "@sateeshperi" + - "@hseabolt" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 05d32cf8..c1c9032e 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -504,6 +504,10 @@ expansionhunter: - modules/expansionhunter/** - tests/modules/expansionhunter/** +faqcs: + - modules/faqcs/** + - tests/modules/faqcs/** + fargene: - modules/fargene/** - tests/modules/fargene/** diff --git a/tests/modules/faqcs/main.nf b/tests/modules/faqcs/main.nf new file mode 100644 index 00000000..eba4bb97 --- /dev/null +++ b/tests/modules/faqcs/main.nf @@ -0,0 +1,30 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { FAQCS } from '../../../modules/faqcs/main.nf' + + +// +// Test with single-end data +// +workflow test_fastp_single_end { + input = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + + FAQCS ( input ) +} + +// +// Test with paired-end data +// +workflow test_fastp_paired_end { + input = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + + FAQCS ( input ) +} + diff --git a/tests/modules/faqcs/nextflow.config b/tests/modules/faqcs/nextflow.config new file mode 100644 index 00000000..8730f1c4 --- /dev/null +++ b/tests/modules/faqcs/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} diff --git a/tests/modules/faqcs/test.yml b/tests/modules/faqcs/test.yml new file mode 100644 index 00000000..47f973f3 --- /dev/null +++ b/tests/modules/faqcs/test.yml @@ -0,0 +1,31 @@ +- name: faqcs test_fastp_single_end + command: nextflow run tests/modules/faqcs -entry test_fastp_single_end -c tests/config/nextflow.config + tags: + - faqcs + files: + - path: output/faqcs/test.fastp.log + md5sum: be79dc893f87de1f82faf749cdfb848c + - path: output/faqcs/test.stats.txt + md5sum: ea20e93706b2e4c676004253baa3cec6 + - path: output/faqcs/test.trimmed.fastq.gz + md5sum: 875863b402f67403dac63ef59b9c9a8a + - path: output/faqcs/test_qc_report.pdf + - path: output/faqcs/versions.yml + md5sum: 2a38d7e7ab5299336e9669c393c9da6c + +- name: faqcs test_fastp_paired_end + command: nextflow run tests/modules/faqcs -entry test_fastp_paired_end -c tests/config/nextflow.config + tags: + - faqcs + files: + - path: output/faqcs/test.fastp.log + md5sum: be79dc893f87de1f82faf749cdfb848c + - path: output/faqcs/test.stats.txt + md5sum: 9a693f8af94ab8c485519d9a523aa622 + - path: output/faqcs/test_1.trimmed.fastq.gz + md5sum: 875863b402f67403dac63ef59b9c9a8a + - path: output/faqcs/test_2.trimmed.fastq.gz + md5sum: 375aeb74819ca3d72203135ac80df78c + - path: output/faqcs/test_qc_report.pdf + - path: output/faqcs/versions.yml + md5sum: 208d54c0cf6dfc54e719b81b990afac9