From c1eb9cce44e0b5cdab6672ebfa2f8e8842d2aa86 Mon Sep 17 00:00:00 2001 From: alyssa-ab <99927286+alyssa-ab@users.noreply.github.com> Date: Fri, 17 Jun 2022 08:18:15 -0500 Subject: [PATCH] Trimmomatic (#1757) * Trimmomatic main only first draft * Add test files * SE PE Adjustment * Remove extra reads input * chore: Remove TODOs * Apply suggestions from code review Co-authored-by: Edmund Miller * fix(trimmomatic): Handle SE output correctly Since there's never going to be unpaired reads for SE reads we can get away with it for SE * fix(trimmomatic): Use correct elvis operator to handle logic * fix(trimmomatic): Add hack to work with SE and PE reads * Update test.yml * use the PE and SE trimming correctly * Made user set adaptors * Add documentation * test(trimmomatic): Add files to pytest_modules * test(trimmomatic): Update name of failing test Co-authored-by: Edmund Miller Co-authored-by: Edmund Miller --- modules/trimmomatic/main.nf | 46 ++++++++++++++++++++ modules/trimmomatic/meta.yml | 53 +++++++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/trimmomatic/main.nf | 42 ++++++++++++++++++ tests/modules/trimmomatic/nextflow.config | 12 +++++ tests/modules/trimmomatic/test.yml | 26 +++++++++++ 6 files changed, 183 insertions(+) create mode 100644 modules/trimmomatic/main.nf create mode 100644 modules/trimmomatic/meta.yml create mode 100644 tests/modules/trimmomatic/main.nf create mode 100644 tests/modules/trimmomatic/nextflow.config create mode 100644 tests/modules/trimmomatic/test.yml diff --git a/modules/trimmomatic/main.nf b/modules/trimmomatic/main.nf new file mode 100644 index 00000000..216fa06f --- /dev/null +++ b/modules/trimmomatic/main.nf @@ -0,0 +1,46 @@ +process TRIMMOMATIC { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::trimmomatic=0.39" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/trimmomatic:0.39--hdfd78af_2': + 'quay.io/biocontainers/trimmomatic:0.39--hdfd78af_2' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.paired.trim*.fastq.gz") , emit: trimmed_reads + tuple val(meta), path("*.unpaired.trim_*.fastq.gz"), optional:true, emit: unpaired_reads + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def trimmed = meta.single_end ? "SE" : "PE" + def output = meta.single_end ? + "${prefix}.SE.paired.trim.fastq.gz" // HACK to avoid unpaired and paired in the trimmed_reads output + : "${prefix}.paired.trim_1.fastq.gz ${prefix}.unpaired.trim_1.fastq.gz ${prefix}.paired.trim_2.fastq.gz ${prefix}.unpaired.trim_2.fastq.gz" + // TODO Give better error output + def qual_trim = task.ext.args2 ?: '' + """ + trimmomatic \\ + $trimmed \\ + -threads $task.cpus \\ + -trimlog ${prefix}.log \\ + $reads \\ + $output \\ + $qual_trim \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + trimmomatic: \$(trimmomatic -version) + END_VERSIONS + """ +} diff --git a/modules/trimmomatic/meta.yml b/modules/trimmomatic/meta.yml new file mode 100644 index 00000000..67859931 --- /dev/null +++ b/modules/trimmomatic/meta.yml @@ -0,0 +1,53 @@ +name: "trimmomatic" +description: Performs quality and adapter trimming on paired end and single end reads +keywords: + - trimming + - adapter trimming + - quality trimming + +tools: + - "trimmomatic": + description: "A flexible read trimming tool for Illumina NGS data" + homepage: "http://www.usadellab.org/cms/?page=trimmomatic" + documentation: "https://github.com/usadellab/Trimmomatic" + tool_dev_url: "None" + doi: "10.1093/bioinformatics/btu170" + licence: "['GPL v3']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + Input FastQ files of size 1 or 2 for single-end and paired-end data, respectively. + pattern: "*.fastq.gz" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - trimmed_reads: + type: file + description: The trimmed/modified paired end fastq reads + pattern: "*.paired.trim*.fastq.gz" + - unpaired_reads: + type: file + description: The trimmed/modified unpaired end fastq reads + pattern: "*.unpaired.trim_*.fastq.gz" + - log: + type: file + description: trimmomatic log file + pattern: "*.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@alyssa-ab" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 8f52a057..198785e6 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -2079,6 +2079,10 @@ trimgalore: - modules/trimgalore/** - tests/modules/trimgalore/** +trimmomatic: + - modules/trimmomatic/** + - tests/modules/trimmomatic/** + ucsc/bed12tobigbed: - modules/ucsc/bed12tobigbed/** - tests/modules/ucsc/bed12tobigbed/** diff --git a/tests/modules/trimmomatic/main.nf b/tests/modules/trimmomatic/main.nf new file mode 100644 index 00000000..bd87f1b6 --- /dev/null +++ b/tests/modules/trimmomatic/main.nf @@ -0,0 +1,42 @@ +nextflow.enable.dsl = 2 + +include { + TRIMMOMATIC as TRIMMOMATIC_SE + TRIMMOMATIC as TRIMMOMATIC_PE + TRIMMOMATIC +} from '../../../modules/trimmomatic/main.nf' + +// +// Test with single-end data +// +workflow test_trimmomatic_single_end { + input = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + + TRIMMOMATIC_SE ( input ) +} + +// +// Test with paired-end data +// +workflow test_trimmomatic_paired_end { + input = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + + TRIMMOMATIC_PE ( input ) +} + +// +// Failing test with no adaptor +// +workflow test_trimmomatic_no_adaptor { + input = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + + TRIMMOMATIC ( input ) +} diff --git a/tests/modules/trimmomatic/nextflow.config b/tests/modules/trimmomatic/nextflow.config new file mode 100644 index 00000000..98833e7e --- /dev/null +++ b/tests/modules/trimmomatic/nextflow.config @@ -0,0 +1,12 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: TRIMMOMATIC_SE { + ext.args2 = 'ILLUMINACLIP:TruSeq3-SE:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36' + } + + withName: TRIMMOMATIC_PE { + ext.args2 = 'ILLUMINACLIP:TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36' + } +} diff --git a/tests/modules/trimmomatic/test.yml b/tests/modules/trimmomatic/test.yml new file mode 100644 index 00000000..12fe44fa --- /dev/null +++ b/tests/modules/trimmomatic/test.yml @@ -0,0 +1,26 @@ +- name: trimmomatic single-end + command: nextflow run ./tests/modules/trimmomatic -entry test_trimmomatic_single_end -c ./tests/config/nextflow.config -c ./tests/modules/trimmomatic/nextflow.config + tags: + - "trimmomatic" + files: + - path: "output/trimmomatic/test.SE.paired.trim.fastq.gz" + - path: "output/trimmomatic/test.log" + md5sum: e4c3f619e9b0e26847f8f3e3d9af319b + +- name: trimmomatic paired-end + command: nextflow run ./tests/modules/trimmomatic -entry test_trimmomatic_paired_end -c ./tests/config/nextflow.config -c ./tests/modules/trimmomatic/nextflow.config + tags: + - "trimmomatic" + files: + - path: "output/trimmomatic/test.log" + md5sum: 9629761761a34576b3484bf4174f681f + - path: "output/trimmomatic/test.paired.trim_1.fastq.gz" + - path: "output/trimmomatic/test.unpaired.trim_1.fastq.gz" + - path: "output/trimmomatic/test.paired.trim_2.fastq.gz" + - path: "output/trimmomatic/test.unpaired.trim_2.fastq.gz" + +- name: trimmomatic no adapter specified + command: nextflow run ./tests/modules/trimmomatic -entry test_trimmomatic_no_adaptor -c ./tests/config/nextflow.config -c ./tests/modules/trimmomatic/nextflow.config + tags: + - "trimmomatic" + exit_code: 1