From aa76c6d870067e950f89f49f7735a25b4a257681 Mon Sep 17 00:00:00 2001 From: Maxime Borry Date: Mon, 22 Mar 2021 18:16:04 +0100 Subject: [PATCH] new module: AdapterRemoval (#309) * add adapterremoval module * fix indentations * switch to process_medium * update docker tests * remove duplicated entry * fix line ending * Update software/adapterremoval/main.nf * Update software/adapterremoval/main.nf Co-authored-by: Harshil Patel --- software/adapterremoval/functions.nf | 60 +++++++++++++++++++ software/adapterremoval/main.nf | 80 ++++++++++++++++++++++++++ software/adapterremoval/meta.yml | 50 ++++++++++++++++ tests/config/pytest_software.yml | 13 +++-- tests/software/adapterremoval/main.nf | 35 +++++++++++ tests/software/adapterremoval/test.yml | 34 +++++++++++ 6 files changed, 267 insertions(+), 5 deletions(-) create mode 100644 software/adapterremoval/functions.nf create mode 100644 software/adapterremoval/main.nf create mode 100644 software/adapterremoval/meta.yml create mode 100644 tests/software/adapterremoval/main.nf create mode 100644 tests/software/adapterremoval/test.yml diff --git a/software/adapterremoval/functions.nf b/software/adapterremoval/functions.nf new file mode 100644 index 00000000..f177f0c8 --- /dev/null +++ b/software/adapterremoval/functions.nf @@ -0,0 +1,60 @@ +/* + * ----------------------------------------------------- + * Utility functions used in nf-core DSL2 module files + * ----------------------------------------------------- + */ + +/* + * Extract name of software tool from process name using $task.process + */ +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +/* + * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules + */ +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_id = args.publish_by_id ?: false + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +/* + * Tidy up and join elements of a list to return a path string + */ +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +/* + * Function to save/publish module results + */ +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_id) { + path_list.add(args.publish_id) + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/software/adapterremoval/main.nf b/software/adapterremoval/main.nf new file mode 100644 index 00000000..7432cae0 --- /dev/null +++ b/software/adapterremoval/main.nf @@ -0,0 +1,80 @@ +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process ADAPTERREMOVAL { + tag "$meta.id" + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + + conda (params.enable_conda ? "bioconda::adapterremoval=2.3.2" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/adapterremoval:2.3.2--hb7ba0dd_0" + } else { + container "quay.io/biocontainers/adapterremoval:2.3.2--hb7ba0dd_0" + } + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path('*.fastq.gz'), emit: reads + tuple val(meta), path('*.log') , emit: log + path "*.version.txt" , emit: version + + script: + def software = getSoftwareName(task.process) + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + + if (meta.single_end) { + """ + AdapterRemoval \\ + --file1 $reads \\ + $options.args \\ + --basename $prefix \\ + --threads $task.cpus \\ + --settings ${prefix}.log \\ + --output1 ${prefix}.trimmed.fastq.gz \\ + --seed 42 \\ + --gzip \\ + + AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g" > ${software}.version.txt + """ + } else if (!meta.single_end && !meta.collapse) { + """ + AdapterRemoval \\ + --file1 ${reads[0]} \\ + --file2 ${reads[0]} \\ + $options.args \\ + --basename $prefix \\ + --threads $task.cpus \\ + --settings ${prefix}.log \\ + --output1 ${prefix}.pair1.trimmed.fastq.gz \\ + --output2 ${prefix}.pair2.trimmed.fastq.gz \\ + --seed 42 \\ + --gzip \\ + + AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g" > ${software}.version.txt + """ + } else { + """ + AdapterRemoval \\ + --file1 ${reads[0]} \\ + --file2 ${reads[0]} \\ + --collapse \\ + $options.args \\ + --basename $prefix \\ + --threads $task.cpus \\ + --settings ${prefix}.log \\ + --seed 42 \\ + --gzip \\ + + cat *.collapsed.gz *.collapsed.truncated.gz > ${prefix}.merged.fastq.gz + AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g" > ${software}.version.txt + """ + } + +} diff --git a/software/adapterremoval/meta.yml b/software/adapterremoval/meta.yml new file mode 100644 index 00000000..15e0a201 --- /dev/null +++ b/software/adapterremoval/meta.yml @@ -0,0 +1,50 @@ +name: adapterremoval +description: Trim sequencing adapters and collapse overlapping reads +keywords: + - trimming + - adapters + - merging + - fastq +tools: + - adapterremoval: + description: The AdapterRemoval v2 tool for merging and clipping reads. + homepage: https://github.com/MikkelSchubert/adapterremoval + documentation: https://adapterremoval.readthedocs.io + licence: ['GPL v3'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false, collapse:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + pattern: "*.{fq,fastq,fg.gz,fastq.gz}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input adapter trimmed FastQ files of size 1 or 2 for + single-end or collapsed data and paired-end data, respectively. + pattern: "*.{fastq.gz}" + - log: + type: file + description: AdapterRemoval log file + pattern: "*.log" + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" + +authors: + - "@maxibor" diff --git a/tests/config/pytest_software.yml b/tests/config/pytest_software.yml index e790427b..630012ba 100644 --- a/tests/config/pytest_software.yml +++ b/tests/config/pytest_software.yml @@ -1,3 +1,7 @@ +adapterremoval: + - software/adapterremoval/** + - tests/software/adapterremoval/** + bandage_image: - software/bandage/image/** - tests/software/bandage/image/** @@ -76,11 +80,6 @@ bismark_methylationextractor: - software/bismark/genomepreparation/** - tests/software/bismark/methylationextractor/** - -blast_blastn: - - software/blast/blastn/** - - tests/software/blast/blastn/** - bismark_report: - software/bismark/genomepreparation/** - software/bismark/align/** @@ -97,6 +96,10 @@ bismark_summary: - software/bismark/summary/** - tests/software/bismark/summary/** +blast_blastn: + - software/blast/blastn/** + - tests/software/blast/blastn/** + blast_makeblastdb: - software/blast/makeblastdb/** - tests/software/blast/makeblastdb/** diff --git a/tests/software/adapterremoval/main.nf b/tests/software/adapterremoval/main.nf new file mode 100644 index 00000000..457287ce --- /dev/null +++ b/tests/software/adapterremoval/main.nf @@ -0,0 +1,35 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { ADAPTERREMOVAL } from '../../../software/adapterremoval/main.nf' addParams( options: [:] ) + +workflow test_adapterremoval_single_end { + + def input = [] + input = [ [ id:'test', single_end:true, collapse:false ], // meta map + file("${launchDir}/tests/data/genomics/sarscov2/fastq/test_1.fastq.gz", checkIfExists: true) ] + + ADAPTERREMOVAL ( input ) +} + +workflow test_adapterremoval_paired_end { + + def input = [] + input = [ [ id:'test', single_end:false, collapse:false ], // meta map + [ file("${launchDir}/tests/data/genomics/sarscov2/fastq/test_1.fastq.gz", checkIfExists: true), + file("${launchDir}/tests/data/genomics/sarscov2/fastq/test_2.fastq.gz", checkIfExists: true) ]] + + ADAPTERREMOVAL ( input ) +} + +workflow test_adapterremoval_paired_end_collapse { + + def input = [] + input = [ [ id:'test', single_end:false, collapse:true ], // meta map + [ file("${launchDir}/tests/data/genomics/sarscov2/fastq/test_1.fastq.gz", checkIfExists: true), + file("${launchDir}/tests/data/genomics/sarscov2/fastq/test_2.fastq.gz", checkIfExists: true) ]] + + ADAPTERREMOVAL ( input ) +} + diff --git a/tests/software/adapterremoval/test.yml b/tests/software/adapterremoval/test.yml new file mode 100644 index 00000000..7666bc6d --- /dev/null +++ b/tests/software/adapterremoval/test.yml @@ -0,0 +1,34 @@ +- name: adapterremoval test_adapterremoval_single_end + command: nextflow run tests/software/adapterremoval -entry test_adapterremoval_single_end -c tests/config/nextflow.config + tags: + - adapterremoval_single_end + - adapterremoval + files: + - path: output/adapterremoval/test.log + md5sum: 2fd3d5d703b63ba33a83021fccf25f77 + - path: output/adapterremoval/test.trimmed.fastq.gz + md5sum: 62139afee94defad5b83bdd0b8475a1f + +- name: adapterremoval test_adapterremoval_paired_end + command: nextflow run tests/software/adapterremoval -entry test_adapterremoval_paired_end -c tests/config/nextflow.config + tags: + - adapterremoval_paired_end + - adapterremoval + files: + - path: output/adapterremoval/test.pair2.trimmed.fastq.gz + md5sum: f076a9f666235e01a3281f8c46c9d010 + - path: output/adapterremoval/test.log + md5sum: bea86105aff4d27fe29c83e24498fefa + - path: output/adapterremoval/test.pair1.trimmed.fastq.gz + md5sum: f076a9f666235e01a3281f8c46c9d010 + +- name: adapterremoval test_adapterremoval_paired_end_collapse + command: nextflow run tests/software/adapterremoval -entry test_adapterremoval_paired_end_collapse -c tests/config/nextflow.config + tags: + - adapterremoval_paired_end_collapse + - adapterremoval + files: + - path: output/adapterremoval/test.log + md5sum: 97cb97b3d03123ac88430768b2e36c59 + - path: output/adapterremoval/test.merged.fastq.gz + md5sum: 50a4f9fdac6a24e211eb4dcf9f292bef