Trimmomatic (#1757)

* Trimmomatic main only first draft

* Add test files

* SE PE Adjustment

* Remove extra reads input

* chore: Remove TODOs

* Apply suggestions from code review

Co-authored-by: Edmund Miller <edmund.a.miller@gmail.com>

* fix(trimmomatic): Handle SE output correctly

Since there's never going to be unpaired reads for SE reads we can get
away with it for SE

* fix(trimmomatic): Use correct elvis operator to handle logic

* fix(trimmomatic): Add hack to work with SE and PE reads

* Update test.yml

* use the PE and SE trimming correctly

* Made user set adaptors

* Add documentation

* test(trimmomatic): Add files to pytest_modules

* test(trimmomatic): Update name of failing test

Co-authored-by: Edmund Miller <edmund.a.miller@protonmail.com>
Co-authored-by: Edmund Miller <edmund.a.miller@gmail.com>
This commit is contained in:
alyssa-ab 2022-06-17 08:18:15 -05:00 committed by GitHub
parent 8296a28346
commit c1eb9cce44
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 183 additions and 0 deletions

View file

@ -0,0 +1,46 @@
process TRIMMOMATIC {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::trimmomatic=0.39" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/trimmomatic:0.39--hdfd78af_2':
'quay.io/biocontainers/trimmomatic:0.39--hdfd78af_2' }"
input:
tuple val(meta), path(reads)
output:
tuple val(meta), path("*.paired.trim*.fastq.gz") , emit: trimmed_reads
tuple val(meta), path("*.unpaired.trim_*.fastq.gz"), optional:true, emit: unpaired_reads
tuple val(meta), path("*.log") , emit: log
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def trimmed = meta.single_end ? "SE" : "PE"
def output = meta.single_end ?
"${prefix}.SE.paired.trim.fastq.gz" // HACK to avoid unpaired and paired in the trimmed_reads output
: "${prefix}.paired.trim_1.fastq.gz ${prefix}.unpaired.trim_1.fastq.gz ${prefix}.paired.trim_2.fastq.gz ${prefix}.unpaired.trim_2.fastq.gz"
// TODO Give better error output
def qual_trim = task.ext.args2 ?: ''
"""
trimmomatic \\
$trimmed \\
-threads $task.cpus \\
-trimlog ${prefix}.log \\
$reads \\
$output \\
$qual_trim \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
trimmomatic: \$(trimmomatic -version)
END_VERSIONS
"""
}

View file

@ -0,0 +1,53 @@
name: "trimmomatic"
description: Performs quality and adapter trimming on paired end and single end reads
keywords:
- trimming
- adapter trimming
- quality trimming
tools:
- "trimmomatic":
description: "A flexible read trimming tool for Illumina NGS data"
homepage: "http://www.usadellab.org/cms/?page=trimmomatic"
documentation: "https://github.com/usadellab/Trimmomatic"
tool_dev_url: "None"
doi: "10.1093/bioinformatics/btu170"
licence: "['GPL v3']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
Input FastQ files of size 1 or 2 for single-end and paired-end data, respectively.
pattern: "*.fastq.gz"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- trimmed_reads:
type: file
description: The trimmed/modified paired end fastq reads
pattern: "*.paired.trim*.fastq.gz"
- unpaired_reads:
type: file
description: The trimmed/modified unpaired end fastq reads
pattern: "*.unpaired.trim_*.fastq.gz"
- log:
type: file
description: trimmomatic log file
pattern: "*.log"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@alyssa-ab"

View file

@ -2079,6 +2079,10 @@ trimgalore:
- modules/trimgalore/**
- tests/modules/trimgalore/**
trimmomatic:
- modules/trimmomatic/**
- tests/modules/trimmomatic/**
ucsc/bed12tobigbed:
- modules/ucsc/bed12tobigbed/**
- tests/modules/ucsc/bed12tobigbed/**

View file

@ -0,0 +1,42 @@
nextflow.enable.dsl = 2
include {
TRIMMOMATIC as TRIMMOMATIC_SE
TRIMMOMATIC as TRIMMOMATIC_PE
TRIMMOMATIC
} from '../../../modules/trimmomatic/main.nf'
//
// Test with single-end data
//
workflow test_trimmomatic_single_end {
input = [ [ id:'test', single_end:true ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
]
TRIMMOMATIC_SE ( input )
}
//
// Test with paired-end data
//
workflow test_trimmomatic_paired_end {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
]
TRIMMOMATIC_PE ( input )
}
//
// Failing test with no adaptor
//
workflow test_trimmomatic_no_adaptor {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
]
TRIMMOMATIC ( input )
}

View file

@ -0,0 +1,12 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: TRIMMOMATIC_SE {
ext.args2 = 'ILLUMINACLIP:TruSeq3-SE:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36'
}
withName: TRIMMOMATIC_PE {
ext.args2 = 'ILLUMINACLIP:TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36'
}
}

View file

@ -0,0 +1,26 @@
- name: trimmomatic single-end
command: nextflow run ./tests/modules/trimmomatic -entry test_trimmomatic_single_end -c ./tests/config/nextflow.config -c ./tests/modules/trimmomatic/nextflow.config
tags:
- "trimmomatic"
files:
- path: "output/trimmomatic/test.SE.paired.trim.fastq.gz"
- path: "output/trimmomatic/test.log"
md5sum: e4c3f619e9b0e26847f8f3e3d9af319b
- name: trimmomatic paired-end
command: nextflow run ./tests/modules/trimmomatic -entry test_trimmomatic_paired_end -c ./tests/config/nextflow.config -c ./tests/modules/trimmomatic/nextflow.config
tags:
- "trimmomatic"
files:
- path: "output/trimmomatic/test.log"
md5sum: 9629761761a34576b3484bf4174f681f
- path: "output/trimmomatic/test.paired.trim_1.fastq.gz"
- path: "output/trimmomatic/test.unpaired.trim_1.fastq.gz"
- path: "output/trimmomatic/test.paired.trim_2.fastq.gz"
- path: "output/trimmomatic/test.unpaired.trim_2.fastq.gz"
- name: trimmomatic no adapter specified
command: nextflow run ./tests/modules/trimmomatic -entry test_trimmomatic_no_adaptor -c ./tests/config/nextflow.config -c ./tests/modules/trimmomatic/nextflow.config
tags:
- "trimmomatic"
exit_code: 1