From f1c5384c31e985591716afdd732cf8c2ae29d05b Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Sun, 3 Apr 2022 16:06:22 +0200 Subject: [PATCH 01/15] Add PRINSEQPLUSPLUS (#1481) * fix: remove left-over unnecessary code * Add prinseq++ * Remove last todo * Fix tests due to variability of output FASTQs (reads can be ordered differently between runs) * Apply suggestions from code review --- modules/prinseqplusplus/main.nf | 61 +++++++++++++++++++ modules/prinseqplusplus/meta.yml | 60 ++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/prinseqplusplus/main.nf | 24 ++++++++ tests/modules/prinseqplusplus/nextflow.config | 9 +++ tests/modules/prinseqplusplus/test.yml | 27 ++++++++ 6 files changed, 185 insertions(+) create mode 100644 modules/prinseqplusplus/main.nf create mode 100644 modules/prinseqplusplus/meta.yml create mode 100644 tests/modules/prinseqplusplus/main.nf create mode 100644 tests/modules/prinseqplusplus/nextflow.config create mode 100644 tests/modules/prinseqplusplus/test.yml diff --git a/modules/prinseqplusplus/main.nf b/modules/prinseqplusplus/main.nf new file mode 100644 index 00000000..ebd8c58c --- /dev/null +++ b/modules/prinseqplusplus/main.nf @@ -0,0 +1,61 @@ +process PRINSEQPLUSPLUS { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::prinseq-plus-plus=1.2.3" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/prinseq-plus-plus:1.2.3--hc90279e_1': + 'quay.io/biocontainers/prinseq-plus-plus:1.2.3--hc90279e_1' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*_good_out*.fastq.gz") , emit: good_reads + tuple val(meta), path("*_single_out*.fastq.gz"), optional: true, emit: single_reads + tuple val(meta), path("*_bad_out*.fastq.gz") , optional: true, emit: bad_reads + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + if (meta.single_end) { + """ + prinseq++ \\ + -threads $task.cpus \\ + -fastq ${reads} \\ + -out_name ${prefix} \\ + -out_gz \\ + -VERBOSE 1 \\ + $args \\ + | tee ${prefix}.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + prinseqplusplus: \$(echo \$(prinseq++ --version | cut -f 2 -d ' ' )) + END_VERSIONS + """ + } else { + """ + prinseq++ \\ + -threads $task.cpus \\ + -fastq ${reads[0]} \\ + -fastq2 ${reads[1]} \\ + -out_name ${prefix} \\ + -out_gz \\ + -VERBOSE 1 \\ + $args \\ + | tee ${prefix}.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + prinseqplusplus: \$(echo \$(prinseq++ --version | cut -f 2 -d ' ' )) + END_VERSIONS + """ + } +} diff --git a/modules/prinseqplusplus/meta.yml b/modules/prinseqplusplus/meta.yml new file mode 100644 index 00000000..8155df93 --- /dev/null +++ b/modules/prinseqplusplus/meta.yml @@ -0,0 +1,60 @@ +name: "prinseqplusplus" +description: PRINSEQ++ is a C++ implementation of the prinseq-lite.pl program. It can be used to filter, reformat or trim genomic and metagenomic sequence data +keywords: + - fastq + - fasta + - filter + - trim +tools: + - "prinseqplusplus": + description: "PRINSEQ++ - Multi-threaded C++ sequence cleaning" + homepage: "https://github.com/Adrian-Cantu/PRINSEQ-plus-plus" + documentation: "https://github.com/Adrian-Cantu/PRINSEQ-plus-plus" + tool_dev_url: "https://github.com/Adrian-Cantu/PRINSEQ-plus-plus" + doi: "10.7287/peerj.preprints.27553v1" + licence: "['GPL v2']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end + data, respectively. + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - good_reads: + type: file + description: Reads passing filter(s) in gzipped FASTQ format + pattern: "*_good_out_{R1,R2}.fastq.gz" + - single_reads: + type: file + description: | + Single reads without the pair passing filter(s) in gzipped FASTQ format + pattern: "*_single_out_{R1,R2}.fastq.gz" + - bad_reads: + type: file + description: | + Reads without not passing filter(s) in gzipped FASTQ format + pattern: "*_bad_out_{R1,R2}.fastq.gz" + - log: + type: file + description: | + Verbose level 2 STDOUT information in a log file + pattern: "*.log" + +authors: + - "@jfy133" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 24bfe641..6d66f230 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1407,6 +1407,10 @@ preseq/lcextrap: - modules/preseq/lcextrap/** - tests/modules/preseq/lcextrap/** +prinseqplusplus: + - modules/prinseqplusplus/** + - tests/modules/prinseqplusplus/** + prodigal: - modules/prodigal/** - tests/modules/prodigal/** diff --git a/tests/modules/prinseqplusplus/main.nf b/tests/modules/prinseqplusplus/main.nf new file mode 100644 index 00000000..d6ee3be9 --- /dev/null +++ b/tests/modules/prinseqplusplus/main.nf @@ -0,0 +1,24 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { PRINSEQPLUSPLUS } from '../../../modules/prinseqplusplus/main.nf' + +workflow test_prinseqplusplus_single_end { + + input = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + + PRINSEQPLUSPLUS ( input ) +} + +workflow test_prinseqplusplus_paired_end { + + input = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + + PRINSEQPLUSPLUS ( input ) +} diff --git a/tests/modules/prinseqplusplus/nextflow.config b/tests/modules/prinseqplusplus/nextflow.config new file mode 100644 index 00000000..032e5713 --- /dev/null +++ b/tests/modules/prinseqplusplus/nextflow.config @@ -0,0 +1,9 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: PRINSEQPLUSPLUS { + ext.args = "-lc_entropy=0.8" + } + +} diff --git a/tests/modules/prinseqplusplus/test.yml b/tests/modules/prinseqplusplus/test.yml new file mode 100644 index 00000000..e034febb --- /dev/null +++ b/tests/modules/prinseqplusplus/test.yml @@ -0,0 +1,27 @@ +- name: prinseqplusplus test_prinseqplusplus_single_end + command: nextflow run tests/modules/prinseqplusplus -entry test_prinseqplusplus_single_end -c tests/config/nextflow.config + tags: + - prinseqplusplus + files: + - path: output/prinseqplusplus/test.log + contains: + - "reads removed by -lc_entropy" + - path: output/prinseqplusplus/test_bad_out.fastq.gz + - path: output/prinseqplusplus/test_good_out.fastq.gz + - path: output/prinseqplusplus/versions.yml + +- name: prinseqplusplus test_prinseqplusplus_paired_end + command: nextflow run tests/modules/prinseqplusplus -entry test_prinseqplusplus_paired_end -c tests/config/nextflow.config + tags: + - prinseqplusplus + files: + - path: output/prinseqplusplus/test.log + contains: + - "reads removed by -lc_entropy" + - path: output/prinseqplusplus/test_bad_out_R1.fastq.gz + - path: output/prinseqplusplus/test_bad_out_R2.fastq.gz + - path: output/prinseqplusplus/test_good_out_R1.fastq.gz + - path: output/prinseqplusplus/test_good_out_R2.fastq.gz + - path: output/prinseqplusplus/test_single_out_R1.fastq.gz + - path: output/prinseqplusplus/test_single_out_R2.fastq.gz + - path: output/prinseqplusplus/versions.yml From 6a11c5a2226436f5543d582b81835819d0767637 Mon Sep 17 00:00:00 2001 From: Sofia Stamouli <91951607+sofstam@users.noreply.github.com> Date: Mon, 4 Apr 2022 10:18:11 +0200 Subject: [PATCH 02/15] Fix untar for centrifuge (#1472) --- modules/centrifuge/main.nf | 4 ++-- modules/centrifuge/meta.yml | 3 +++ tests/modules/centrifuge/main.nf | 13 +++++++++---- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/modules/centrifuge/main.nf b/modules/centrifuge/main.nf index 7eb566da..c9ec377b 100644 --- a/modules/centrifuge/main.nf +++ b/modules/centrifuge/main.nf @@ -10,6 +10,7 @@ process CENTRIFUGE { input: tuple val(meta), path(reads) path db + val db_name val save_unaligned val save_aligned val sam_format @@ -42,9 +43,8 @@ process CENTRIFUGE { } def sam_output = sam_format ? "--out-fmt 'sam'" : '' """ - tar -xf $db centrifuge \\ - -x $db_name \\ + -x ${db}/${db_name} \\ -p $task.cpus \\ $paired \\ --report-file ${prefix}.report.txt \\ diff --git a/modules/centrifuge/meta.yml b/modules/centrifuge/meta.yml index 3adf0e23..aabb465f 100644 --- a/modules/centrifuge/meta.yml +++ b/modules/centrifuge/meta.yml @@ -27,6 +27,9 @@ input: type: directory description: Centrifuge database in .tar.gz format pattern: "*.tar.gz" + - db_name: + type: string + description: Centrifuge database filenames without the suffix ".cf" - save_unaligned: type: value description: If true unmapped fastq files are saved diff --git a/tests/modules/centrifuge/main.nf b/tests/modules/centrifuge/main.nf index a8eb2fcb..37393ce5 100644 --- a/tests/modules/centrifuge/main.nf +++ b/tests/modules/centrifuge/main.nf @@ -2,18 +2,21 @@ nextflow.enable.dsl = 2 +include { UNTAR } from '../../../modules/untar/main.nf' include { CENTRIFUGE } from '../../../modules/centrifuge/main.nf' workflow test_centrifuge_single_end { input = [ [ id:'test', single_end:true ], // meta map [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] ] - db = file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/minigut_cf.tar.gz", checkIfExists: true) + db = [ [], file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/minigut_cf.tar.gz', checkIfExists: true) ] + db_name = "minigut_cf" save_unaligned = true save_aligned = false sam_format = false - CENTRIFUGE ( input, db, save_unaligned, save_aligned, sam_format ) + UNTAR ( db ) + CENTRIFUGE ( input, UNTAR.out.untar.map{ it[1] },db_name, save_unaligned, save_aligned, sam_format ) } @@ -22,12 +25,14 @@ workflow test_centrifuge_paired_end { [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] ] - db = file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/minigut_cf.tar.gz", checkIfExists: true) + db = [ [], file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/minigut_cf.tar.gz', checkIfExists: true) ] + db_name = "minigut_cf" save_unaligned = true save_aligned = false sam_format = false - CENTRIFUGE ( input, db, save_unaligned, save_aligned, sam_format ) + UNTAR ( db ) + CENTRIFUGE ( input, UNTAR.out.untar.map{ it[1] }, db_name, save_unaligned, save_aligned, sam_format ) } From cb54d1ebd77de5b482cae89ed9e51fa6ef97d3ee Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 4 Apr 2022 13:18:02 +0200 Subject: [PATCH 03/15] update stubs (#1488) --- modules/cnvpytor/histogram/main.nf | 2 +- modules/cnvpytor/partition/main.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/cnvpytor/histogram/main.nf b/modules/cnvpytor/histogram/main.nf index d1c6856c..9e59c6b8 100644 --- a/modules/cnvpytor/histogram/main.nf +++ b/modules/cnvpytor/histogram/main.nf @@ -32,7 +32,7 @@ process CNVPYTOR_HISTOGRAM { stub: """ - touch test.pytor + touch ${pytor.baseName}.pytor cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/cnvpytor/partition/main.nf b/modules/cnvpytor/partition/main.nf index 975458bf..0311bdfc 100644 --- a/modules/cnvpytor/partition/main.nf +++ b/modules/cnvpytor/partition/main.nf @@ -32,7 +32,7 @@ process CNVPYTOR_PARTITION { stub: """ - touch test.pytor + touch ${pytor.baseName}.pytor cat <<-END_VERSIONS > versions.yml "${task.process}": From ae48653bd2d169510580220bb62d96f830c31293 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Mon, 4 Apr 2022 13:31:36 +0200 Subject: [PATCH 04/15] DASTool output channels update (#1489) * fix: remove left-over unnecessary code * Make summary output optional as not generated if no sufficiently HQ bins found * Make contig2bin optional as only generated if sufficient HQ bins found --- modules/dastool/dastool/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/dastool/dastool/main.nf b/modules/dastool/dastool/main.nf index 968f85de..a7d9c6f6 100644 --- a/modules/dastool/dastool/main.nf +++ b/modules/dastool/dastool/main.nf @@ -14,8 +14,8 @@ process DASTOOL_DASTOOL { output: tuple val(meta), path("*.log") , emit: log - tuple val(meta), path("*_summary.tsv") , emit: summary - tuple val(meta), path("*_DASTool_contig2bin.tsv") , emit: contig2bin + tuple val(meta), path("*_summary.tsv") , optional: true, emit: summary + tuple val(meta), path("*_DASTool_contig2bin.tsv") , optional: true, emit: contig2bin tuple val(meta), path("*.eval") , optional: true, emit: eval tuple val(meta), path("*_DASTool_bins/*.fa") , optional: true, emit: bins tuple val(meta), path("*.pdf") , optional: true, emit: pdfs From 13cc32399cdaa866092b1bbc6e8a982d51c455db Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Mon, 4 Apr 2022 13:21:37 -0500 Subject: [PATCH 05/15] feat(homer): Add groseq subworkflow (#1492) * feat(homer): Add groseq subworkflow * fix(homer): Update groseq paths * test(homer): Update groseq bam md5sums * test(homer): Update bed process args wildcard * test(homer): Update groseq bed md5s * style: Run prettier * style(homer): Align comments Co-authored-by: Friederike Hanssen * docs(homer): Add groseq meta.yml Co-authored-by: Friederike Hanssen --- subworkflows/nf-core/homer/groseq/main.nf | 50 +++++++++++++++++++ subworkflows/nf-core/homer/groseq/meta.yml | 48 ++++++++++++++++++ .../subworkflows/nf-core/homer/groseq/main.nf | 24 +++++++++ .../nf-core/homer/groseq/nextflow.config | 9 ++++ .../nf-core/homer/groseq/test.yml | 27 ++++++++++ 5 files changed, 158 insertions(+) create mode 100644 subworkflows/nf-core/homer/groseq/main.nf create mode 100644 subworkflows/nf-core/homer/groseq/meta.yml create mode 100644 tests/subworkflows/nf-core/homer/groseq/main.nf create mode 100644 tests/subworkflows/nf-core/homer/groseq/nextflow.config create mode 100644 tests/subworkflows/nf-core/homer/groseq/test.yml diff --git a/subworkflows/nf-core/homer/groseq/main.nf b/subworkflows/nf-core/homer/groseq/main.nf new file mode 100644 index 00000000..b83c7e21 --- /dev/null +++ b/subworkflows/nf-core/homer/groseq/main.nf @@ -0,0 +1,50 @@ +/* + * Identify transcripts with homer + */ + +include { HOMER_MAKETAGDIRECTORY } from '../../../../modules/homer/maketagdirectory/main' +include { HOMER_MAKEUCSCFILE } from '../../../../modules/homer/makeucscfile/main' +include { HOMER_FINDPEAKS } from '../../../../modules/homer/findpeaks/main' +include { HOMER_POS2BED } from '../../../../modules/homer/pos2bed/main' + +workflow HOMER_GROSEQ { + take: + bam // channel: [ val(meta), [ reads ] ] + fasta // file: /path/to/bwa/index/ + + main: + + ch_versions = Channel.empty() + + /* + * Create a Tag Directory From The GRO-Seq experiment + */ + HOMER_MAKETAGDIRECTORY ( bam, fasta ) + ch_versions = ch_versions.mix(HOMER_MAKETAGDIRECTORY.out.versions.first()) + + /* + * Creating UCSC Visualization Files + */ + HOMER_MAKEUCSCFILE ( HOMER_MAKETAGDIRECTORY.out.tagdir ) + ch_versions = ch_versions.mix(HOMER_MAKEUCSCFILE.out.versions.first()) + + /* + * Find transcripts directly from GRO-Seq + */ + HOMER_FINDPEAKS ( HOMER_MAKETAGDIRECTORY.out.tagdir ) + ch_versions = ch_versions.mix(HOMER_FINDPEAKS.out.versions.first()) + + /* + * Convert peak file to bed file + */ + HOMER_POS2BED ( HOMER_FINDPEAKS.out.txt ) + ch_versions = ch_versions.mix(HOMER_POS2BED.out.versions.first()) + + emit: + tagdir = HOMER_MAKETAGDIRECTORY.out.tagdir // channel: [ val(meta), [ tagdir ] ] + bed_graph = HOMER_MAKEUCSCFILE.out.bedGraph // channel: [ val(meta), [ tag_dir/*ucsc.bedGraph.gz ] ] + peaks = HOMER_FINDPEAKS.out.txt // channel: [ val(meta), [ *peaks.txt ] ] + bed = HOMER_POS2BED.out.bed // channel: [ val(meta), [ *peaks.txt ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/homer/groseq/meta.yml b/subworkflows/nf-core/homer/groseq/meta.yml new file mode 100644 index 00000000..4bd36a88 --- /dev/null +++ b/subworkflows/nf-core/homer/groseq/meta.yml @@ -0,0 +1,48 @@ +name: homer_groseq +description: Perform variant calling on a set of normal samples using mutect2 panel of normals mode. Group them into a genomicsdbworkspace using genomicsdbimport, then use this to create a panel of normals using createsomaticpanelofnormals. +keywords: + - homer + - groseq + - nascent +modules: + - homer/maketagdirectory + - homer/makeucscfile + - homer/findpeaks + - homer/pos2bed +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - input: + type: list + description: list of BAM files, also able to take SAM and BED as input + pattern: "[ *.{bam/sam/bed} ]" + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" +output: + - tagdir: + type: directory + description: The "Tag Directory" + pattern: "*_tagdir" + - bedGraph: + type: file + description: The UCSC bed graph + pattern: "*.bedGraph.gz" + - peaks: + type: file + description: The found peaks + pattern: "*.peaks.txt" + - bed: + type: file + description: A BED file of the found peaks + pattern: "*.bed" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Emiller88" diff --git a/tests/subworkflows/nf-core/homer/groseq/main.nf b/tests/subworkflows/nf-core/homer/groseq/main.nf new file mode 100644 index 00000000..72b95e87 --- /dev/null +++ b/tests/subworkflows/nf-core/homer/groseq/main.nf @@ -0,0 +1,24 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { HOMER_GROSEQ as HOMER_GROSEQ_BAM + HOMER_GROSEQ as HOMER_GROSEQ_BED } from '../../../../../subworkflows/nf-core/homer/groseq/main' + +workflow test_homer_groseq_bam { + def input = [] + input = [[ id: 'test' ], + [ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)]] + def fasta = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + + HOMER_GROSEQ_BAM ( input, fasta ) +} + +workflow test_homer_groseq_bed { + def input = [] + input = [[ id: 'test' ], + [ file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true)]] + def fasta = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + + HOMER_GROSEQ_BED ( input, fasta ) +} diff --git a/tests/subworkflows/nf-core/homer/groseq/nextflow.config b/tests/subworkflows/nf-core/homer/groseq/nextflow.config new file mode 100644 index 00000000..09a44497 --- /dev/null +++ b/tests/subworkflows/nf-core/homer/groseq/nextflow.config @@ -0,0 +1,9 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: '.*:HOMER_GROSEQ_BED:HOMER_MAKETAGDIRECTORY' { + ext.args = "-checkGC -format bed" + } + +} diff --git a/tests/subworkflows/nf-core/homer/groseq/test.yml b/tests/subworkflows/nf-core/homer/groseq/test.yml new file mode 100644 index 00000000..1eaeb3f5 --- /dev/null +++ b/tests/subworkflows/nf-core/homer/groseq/test.yml @@ -0,0 +1,27 @@ +- name: subworkflow homer_groseq bam + command: nextflow run ./tests/subworkflows/nf-core/homer/groseq/ -entry test_homer_groseq_bam -c tests/config/nextflow.config -c tests/subworkflows/nf-core/homer/groseq/nextflow.config + tags: + - homer + files: + - path: output/homer/test.bed + md5sum: 8d40034dfe22c5cf973071aa1e8d3617 + - path: output/homer/test.bedGraph.gz + md5sum: de2b2f8ab90a909b8bfbe755bdaba407 + - path: output/homer/test.peaks.txt + md5sum: 8d40034dfe22c5cf973071aa1e8d3617 + - path: output/homer/versions.yml + md5sum: c85dee03f1afabe406a87743a4c5506d + +- name: subworkflow homer_groseq bed + command: nextflow run ./tests/subworkflows/nf-core/homer/groseq/ -entry test_homer_groseq_bed -c tests/config/nextflow.config -c tests/subworkflows/nf-core/homer/groseq/nextflow.config + tags: + - homer + files: + - path: output/homer/test.bed + md5sum: 25e8b64946012d1c4567a04062e90fae + - path: output/homer/test.bedGraph.gz + md5sum: 2d2d1c2d3242ff74c7a922695accb9d2 + - path: output/homer/test.peaks.txt + md5sum: 25e8b64946012d1c4567a04062e90fae + - path: output/homer/versions.yml + md5sum: c9b5f1248d28c216b000cba8da738455 From 879d42c5e28661fe0a5e744c9e2c515868f9e08a Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Mon, 4 Apr 2022 21:40:35 +0200 Subject: [PATCH 06/15] Refactor adapterremoval (#1491) * refactor: insert .fastq file extensions * style: insert whitespace * refactor: create paired output * refactor: rename settings from log Requested by @jfy133 * tests: correct expected output * fix: remove settings option due to default * chore: rename output patterns * refactor: omit paired files in single-end * refactor: rename output to settings --- modules/adapterremoval/main.nf | 44 ++++++++++++++++++++------- modules/adapterremoval/meta.yml | 14 ++++----- tests/modules/adapterremoval/test.yml | 26 ++++++++-------- 3 files changed, 53 insertions(+), 31 deletions(-) diff --git a/modules/adapterremoval/main.nf b/modules/adapterremoval/main.nf index 9d16b9c9..0e17c055 100644 --- a/modules/adapterremoval/main.nf +++ b/modules/adapterremoval/main.nf @@ -12,15 +12,14 @@ process ADAPTERREMOVAL { path(adapterlist) output: - tuple val(meta), path("${prefix}.truncated.gz") , optional: true, emit: singles_truncated - tuple val(meta), path("${prefix}.discarded.gz") , optional: true, emit: discarded - tuple val(meta), path("${prefix}.pair1.truncated.gz") , optional: true, emit: pair1_truncated - tuple val(meta), path("${prefix}.pair2.truncated.gz") , optional: true, emit: pair2_truncated - tuple val(meta), path("${prefix}.collapsed.gz") , optional: true, emit: collapsed - tuple val(meta), path("${prefix}.collapsed.truncated.gz") , optional: true, emit: collapsed_truncated - tuple val(meta), path("${prefix}.paired.gz") , optional: true, emit: paired_interleaved - tuple val(meta), path('*.log') , emit: log - path "versions.yml" , emit: versions + tuple val(meta), path("${prefix}.truncated.fastq.gz") , optional: true, emit: singles_truncated + tuple val(meta), path("${prefix}.discarded.fastq.gz") , optional: true, emit: discarded + tuple val(meta), path("${prefix}.pair{1,2}.truncated.fastq.gz") , optional: true, emit: paired_truncated + tuple val(meta), path("${prefix}.collapsed.fastq.gz") , optional: true, emit: collapsed + tuple val(meta), path("${prefix}.collapsed.truncated.fastq.gz") , optional: true, emit: collapsed_truncated + tuple val(meta), path("${prefix}.paired.fastq.gz") , optional: true, emit: paired_interleaved + tuple val(meta), path('*.settings') , emit: settings + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -38,10 +37,19 @@ process ADAPTERREMOVAL { $adapterlist \\ --basename ${prefix} \\ --threads ${task.cpus} \\ - --settings ${prefix}.log \\ --seed 42 \\ --gzip + ensure_fastq() { + if [ -f "\${1}" ]; then + mv "\${1}" "\${1::-3}.fastq.gz" + fi + + } + + ensure_fastq '${prefix}.truncated.gz' + ensure_fastq '${prefix}.discarded.gz' + cat <<-END_VERSIONS > versions.yml "${task.process}": adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g") @@ -56,10 +64,24 @@ process ADAPTERREMOVAL { $adapterlist \\ --basename ${prefix} \\ --threads $task.cpus \\ - --settings ${prefix}.log \\ --seed 42 \\ --gzip + ensure_fastq() { + if [ -f "\${1}" ]; then + mv "\${1}" "\${1::-3}.fastq.gz" + fi + + } + + ensure_fastq '${prefix}.truncated.gz' + ensure_fastq '${prefix}.discarded.gz' + ensure_fastq '${prefix}.pair1.truncated.gz' + ensure_fastq '${prefix}.pair2.truncated.gz' + ensure_fastq '${prefix}.collapsed.gz' + ensure_fastq '${prefix}.collapsed.truncated.gz' + ensure_fastq '${prefix}.paired.gz' + cat <<-END_VERSIONS > versions.yml "${task.process}": adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g") diff --git a/modules/adapterremoval/meta.yml b/modules/adapterremoval/meta.yml index 5faad043..77273f60 100644 --- a/modules/adapterremoval/meta.yml +++ b/modules/adapterremoval/meta.yml @@ -43,43 +43,43 @@ output: Adapter trimmed FastQ files of either single-end reads, or singleton 'orphaned' reads from merging of paired-end data (i.e., one of the pair was lost due to filtering thresholds). - pattern: "*.truncated.gz" + pattern: "*.truncated.fastq.gz" - discarded: type: file description: | Adapter trimmed FastQ files of reads that did not pass filtering thresholds. - pattern: "*.discarded.gz" + pattern: "*.discarded.fastq.gz" - pair1_truncated: type: file description: | Adapter trimmed R1 FastQ files of paired-end reads that did not merge with their respective R2 pair due to long templates. The respective pair is stored in 'pair2_truncated'. - pattern: "*.pair1.truncated.gz" + pattern: "*.pair1.truncated.fastq.gz" - pair2_truncated: type: file description: | Adapter trimmed R2 FastQ files of paired-end reads that did not merge with their respective R1 pair due to long templates. The respective pair is stored in 'pair1_truncated'. - pattern: "*.pair2.truncated.gz" + pattern: "*.pair2.truncated.fastq.gz" - collapsed: type: file description: | Collapsed FastQ of paired-end reads that successfully merged with their respective R1 pair but were not trimmed. - pattern: "*.collapsed.gz" + pattern: "*.collapsed.fastq.gz" - collapsed_truncated: type: file description: | Collapsed FastQ of paired-end reads that successfully merged with their respective R1 pair and were trimmed of adapter due to sufficient overlap. - pattern: "*.collapsed.truncated.gz" + pattern: "*.collapsed.truncated.fastq.gz" - log: type: file description: AdapterRemoval log file - pattern: "*.log" + pattern: "*.settings" - versions: type: file description: File containing software versions diff --git a/tests/modules/adapterremoval/test.yml b/tests/modules/adapterremoval/test.yml index f6adfba3..e660da76 100644 --- a/tests/modules/adapterremoval/test.yml +++ b/tests/modules/adapterremoval/test.yml @@ -3,10 +3,10 @@ tags: - adapterremoval files: - - path: output/adapterremoval/test.discarded.gz - - path: output/adapterremoval/test.log + - path: output/adapterremoval/test.discarded.fastq.gz + - path: output/adapterremoval/test.settings md5sum: 2fd3d5d703b63ba33a83021fccf25f77 - - path: output/adapterremoval/test.truncated.gz + - path: output/adapterremoval/test.truncated.fastq.gz md5sum: 62139afee94defad5b83bdd0b8475a1f - path: output/adapterremoval/versions.yml md5sum: ac5b46719719b7ee62739530b80869fc @@ -16,12 +16,12 @@ tags: - adapterremoval files: - - path: output/adapterremoval/test.discarded.gz - - path: output/adapterremoval/test.log + - path: output/adapterremoval/test.discarded.fastq.gz + - path: output/adapterremoval/test.settings md5sum: b8a451d3981b327f3fdb44f40ba2d6d1 - - path: output/adapterremoval/test.pair1.truncated.gz + - path: output/adapterremoval/test.pair1.truncated.fastq.gz md5sum: 294a6277f0139bd597e57c6fa31f39c7 - - path: output/adapterremoval/test.pair2.truncated.gz + - path: output/adapterremoval/test.pair2.truncated.fastq.gz md5sum: de7b38e2c881bced8671acb1ab452d78 - path: output/adapterremoval/versions.yml md5sum: fa621c887897da5a379c719399c17db7 @@ -31,15 +31,15 @@ tags: - adapterremoval files: - - path: output/adapterremoval/test.collapsed.gz + - path: output/adapterremoval/test.collapsed.fastq.gz md5sum: ff956de3532599a56c3efe5369f0953f - - path: output/adapterremoval/test.collapsed.truncated.gz - - path: output/adapterremoval/test.discarded.gz - - path: output/adapterremoval/test.log + - path: output/adapterremoval/test.collapsed.truncated.fastq.gz + - path: output/adapterremoval/test.discarded.fastq.gz + - path: output/adapterremoval/test.settings md5sum: 7f0b2328152226e46101a535cce718b3 - - path: output/adapterremoval/test.pair1.truncated.gz + - path: output/adapterremoval/test.pair1.truncated.fastq.gz md5sum: 683be19bc1c83008944b6b719bfa34e1 - - path: output/adapterremoval/test.pair2.truncated.gz + - path: output/adapterremoval/test.pair2.truncated.fastq.gz md5sum: e6548fe061f3ef86368b26da930174d0 - path: output/adapterremoval/versions.yml md5sum: 78f589bb313c8da0147ca8ce77d7f3bf From 797ce3254e1868b224ec5c2742418876af254c35 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Tue, 5 Apr 2022 11:06:46 +0200 Subject: [PATCH 07/15] Update: biobambam/bammarkduplicates2 to v2.0.183 (#1493) * bump version, remove md5sums from test * re-add md5sums --- modules/biobambam/bammarkduplicates2/main.nf | 6 ++---- tests/modules/biobambam/bammarkduplicates2/test.yml | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/modules/biobambam/bammarkduplicates2/main.nf b/modules/biobambam/bammarkduplicates2/main.nf index a93e55b5..dd0e55b6 100644 --- a/modules/biobambam/bammarkduplicates2/main.nf +++ b/modules/biobambam/bammarkduplicates2/main.nf @@ -2,10 +2,8 @@ process BIOBAMBAM_BAMMARKDUPLICATES2 { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::biobambam=2.0.182" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/biobambam:2.0.182--h7d875b9_0': - 'quay.io/biocontainers/biobambam:2.0.182--h7d875b9_0' }" + conda (params.enable_conda ? "bioconda::biobambam=2.0.183" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/biobambam:2.0.183--h9f5acd7_1' : 'quay.io/biocontainers/biobambam:2.0.183--h9f5acd7_1'}" input: tuple val(meta), path(bam) diff --git a/tests/modules/biobambam/bammarkduplicates2/test.yml b/tests/modules/biobambam/bammarkduplicates2/test.yml index d046dfe9..7c16fcf1 100644 --- a/tests/modules/biobambam/bammarkduplicates2/test.yml +++ b/tests/modules/biobambam/bammarkduplicates2/test.yml @@ -5,8 +5,8 @@ - biobambam files: - path: output/biobambam/test.bam - md5sum: 1cf7f957eb20b4ace9f10d0cf0a0649a + md5sum: 603edff09029096ddf2bb8a3f12d7aa7 - path: output/biobambam/test.metrics.txt md5sum: 30d6e7d90bb5df46329d4bc0144ce927 - path: output/biobambam/versions.yml - md5sum: 0d6f3137ed4515333d73c779f2c24445 + md5sum: dfdf2b084655d124acac0bfb4eda86cc From dc95e67e153ad937b869d90229333c0654628912 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Wed, 6 Apr 2022 08:18:23 +0200 Subject: [PATCH 08/15] New tool: biobambam/bamsormadup (#1478) * add bamsormadup * fix yaml * add test.yml * Update tests/modules/biobambam/bamsormadup/test.yml Co-authored-by: James A. Fellows Yates * test meta.yaml: remove md5sums * Tool bamsormadup: - add (optional) reference input - add bam index ouput - add cram output option - make metrics output: more general * fix input and output formats * update input file description * drop sam output, goes against nf-core regs; add input check for cram files * fix typo * Update modules/biobambam/bamsormadup/main.nf Co-authored-by: James A. Fellows Yates * improve ref fasta name * fix if else shorthand * fix syntax error * kind of fix tests * set fixed suffix for metrics file to keep it in line with picard and bammarkduplicates2 * fix command line * update test.yml * add support for multiple input bams * Update modules/biobambam/bamsormadup/meta.yml Co-authored-by: James A. Fellows Yates * Update modules/biobambam/bamsormadup/meta.yml Co-authored-by: James A. Fellows Yates * Update tests/modules/biobambam/bamsormadup/test.yml Co-authored-by: James A. Fellows Yates Co-authored-by: James A. Fellows Yates --- modules/biobambam/bamsormadup/main.nf | 46 ++++++++++++++++ modules/biobambam/bamsormadup/meta.yml | 52 +++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/biobambam/bamsormadup/main.nf | 15 ++++++ .../biobambam/bamsormadup/nextflow.config | 5 ++ tests/modules/biobambam/bamsormadup/test.yml | 11 ++++ 6 files changed, 133 insertions(+) create mode 100644 modules/biobambam/bamsormadup/main.nf create mode 100644 modules/biobambam/bamsormadup/meta.yml create mode 100644 tests/modules/biobambam/bamsormadup/main.nf create mode 100644 tests/modules/biobambam/bamsormadup/nextflow.config create mode 100644 tests/modules/biobambam/bamsormadup/test.yml diff --git a/modules/biobambam/bamsormadup/main.nf b/modules/biobambam/bamsormadup/main.nf new file mode 100644 index 00000000..b9e28e43 --- /dev/null +++ b/modules/biobambam/bamsormadup/main.nf @@ -0,0 +1,46 @@ +process BIOBAMBAM_BAMSORMADUP { + tag "$meta.id" + label "process_medium" + + conda (params.enable_conda ? "bioconda::biobambam=2.0.183" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/biobambam:2.0.183--h9f5acd7_1' : 'quay.io/biocontainers/biobambam:2.0.183--h9f5acd7_1'}" + + input: + tuple val(meta), path(bams) + path(fasta) + + output: + tuple val(meta), path("*.{bam,cram}") ,emit: bam + tuple val(meta), path("*.bam.bai") ,optional:true, emit: bam_index + tuple val(meta), path("*.metrics.txt") ,emit: metrics + path "versions.yml" ,emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = args.contains("outputformat=cram") ? "cram" : "bam" + def input_string = bams.join(" I=") + + if (args.contains("outputformat=cram") && reference == null) error "Reference required for CRAM output." + + """ + bamcat \\ + I=${input_string} \\ + level=0 \\ + | bamsormadup \\ + $args \\ + M=${prefix}.metrics.txt \\ + tmpfile=$prefix \\ + threads=$task.cpus \\ + > ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bamcat: \$(echo \$(bamsormadup --version 2>&1) | sed 's/^This is biobambam2 version //; s/..biobambam2 is .*\$//' ) + bamsormadup: \$(echo \$(bamsormadup --version 2>&1) | sed 's/^This is biobambam2 version //; s/..biobambam2 is .*\$//' ) + END_VERSIONS + """ +} diff --git a/modules/biobambam/bamsormadup/meta.yml b/modules/biobambam/bamsormadup/meta.yml new file mode 100644 index 00000000..39acf3b3 --- /dev/null +++ b/modules/biobambam/bamsormadup/meta.yml @@ -0,0 +1,52 @@ +name: biobambam_bamsormadup +description: Parallel sorting and duplicate marking +keywords: + - markduplicates + - sort + - bam + - cram +tools: + - biobambam: + description: | + biobambam is a set of tools for early stage alignment file processing. + homepage: https://gitlab.com/german.tischler/biobambam2 + documentation: https://gitlab.com/german.tischler/biobambam2/-/blob/master/README.md + doi: 10.1186/1751-0473-9-13 + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bams: + type: file + description: List containing 1 or more bam files + - fasta: + type: file + description: Reference genome in FASTA format (optional) + pattern: "*.{fa,fasta}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM file with duplicate reads marked/removed + pattern: "*.{bam,cram}" + - bam_index: + type: file + description: BAM index file + pattern: "*.{bai}" + - metrics: + type: file + description: Duplicate metrics file generated by biobambam + pattern: "*.{metrics.txt}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@matthdsm" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 6d66f230..d47b95c4 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -214,6 +214,10 @@ biobambam/bammarkduplicates2: - modules/biobambam/bammarkduplicates2/** - tests/modules/biobambam/bammarkduplicates2/** +biobambam/bamsormadup: + - modules/biobambam/bamsormadup/** + - tests/modules/biobambam/bamsormadup/** + biscuit/align: - modules/biscuit/index/** - modules/biscuit/align/** diff --git a/tests/modules/biobambam/bamsormadup/main.nf b/tests/modules/biobambam/bamsormadup/main.nf new file mode 100644 index 00000000..741a4433 --- /dev/null +++ b/tests/modules/biobambam/bamsormadup/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { BIOBAMBAM_BAMSORMADUP } from '../../../../modules/biobambam/bamsormadup/main.nf' + +workflow test_biobambam_bamsormadup { + + input = [ + [ id:'test', single_end:false ], // meta map + [file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)], + ] + + BIOBAMBAM_BAMSORMADUP ( input, [] ) +} diff --git a/tests/modules/biobambam/bamsormadup/nextflow.config b/tests/modules/biobambam/bamsormadup/nextflow.config new file mode 100644 index 00000000..8730f1c4 --- /dev/null +++ b/tests/modules/biobambam/bamsormadup/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} diff --git a/tests/modules/biobambam/bamsormadup/test.yml b/tests/modules/biobambam/bamsormadup/test.yml new file mode 100644 index 00000000..a7a14202 --- /dev/null +++ b/tests/modules/biobambam/bamsormadup/test.yml @@ -0,0 +1,11 @@ +- name: biobambam bamsormadup test_biobambam_bamsormadup + command: nextflow run tests/modules/biobambam/bamsormadup -entry test_biobambam_bamsormadup -c tests/config/nextflow.config + tags: + - biobambam/bamsormadup + - biobambam + files: + - path: output/biobambam/test.bam + md5sum: 243a77fb0642fd46bb16a4d3432d19dc + - path: output/biobambam/test.metrics.txt + md5sum: 1721879bea1f3888ecd33b35e6ee0e72 + - path: output/biobambam/versions.yml From d2726fcf75063960f06b36d2229a4c0966614108 Mon Sep 17 00:00:00 2001 From: Sofia Stamouli <91951607+sofstam@users.noreply.github.com> Date: Thu, 7 Apr 2022 11:46:34 +0200 Subject: [PATCH 09/15] Update centrifuge/centrifuge (#1495) --- modules/centrifuge/{ => centrifuge}/main.nf | 10 ++++------ modules/centrifuge/{ => centrifuge}/meta.yml | 14 ++------------ tests/config/pytest_modules.yml | 6 +++--- tests/modules/centrifuge/{ => centrifuge}/main.nf | 15 +++++++-------- .../centrifuge/{ => centrifuge}/nextflow.config | 0 .../modules/centrifuge/{ => centrifuge}/test.yml | 12 ++++++------ 6 files changed, 22 insertions(+), 35 deletions(-) rename modules/centrifuge/{ => centrifuge}/main.nf (88%) rename modules/centrifuge/{ => centrifuge}/meta.yml (82%) rename tests/modules/centrifuge/{ => centrifuge}/main.nf (66%) rename tests/modules/centrifuge/{ => centrifuge}/nextflow.config (100%) rename tests/modules/centrifuge/{ => centrifuge}/test.yml (51%) diff --git a/modules/centrifuge/main.nf b/modules/centrifuge/centrifuge/main.nf similarity index 88% rename from modules/centrifuge/main.nf rename to modules/centrifuge/centrifuge/main.nf index c9ec377b..3d23fc96 100644 --- a/modules/centrifuge/main.nf +++ b/modules/centrifuge/centrifuge/main.nf @@ -1,4 +1,4 @@ -process CENTRIFUGE { +process CENTRIFUGE_CENTRIFUGE { tag "$meta.id" label 'process_high' @@ -10,7 +10,6 @@ process CENTRIFUGE { input: tuple val(meta), path(reads) path db - val db_name val save_unaligned val save_aligned val sam_format @@ -18,7 +17,6 @@ process CENTRIFUGE { output: tuple val(meta), path('*report.txt') , emit: report tuple val(meta), path('*results.txt') , emit: results - tuple val(meta), path('*kreport.txt') , emit: kreport tuple val(meta), path('*.sam') , optional: true, emit: sam tuple val(meta), path('*.mapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_mapped tuple val(meta), path('*.unmapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_unmapped @@ -31,7 +29,6 @@ process CENTRIFUGE { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def paired = meta.single_end ? "-U ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}" - def db_name = db.toString().replace(".tar.gz","") def unaligned = '' def aligned = '' if (meta.single_end) { @@ -43,8 +40,10 @@ process CENTRIFUGE { } def sam_output = sam_format ? "--out-fmt 'sam'" : '' """ + ## we add "-no-name ._" to ensure silly Mac OSX metafiles files aren't included + db_name=`find -L ${db} -name "*.1.cf" -not -name "._*" | sed 's/.1.cf//'` centrifuge \\ - -x ${db}/${db_name} \\ + -x \$db_name \\ -p $task.cpus \\ $paired \\ --report-file ${prefix}.report.txt \\ @@ -53,7 +52,6 @@ process CENTRIFUGE { $aligned \\ $sam_output \\ $args - centrifuge-kreport -x $db_name ${prefix}.results.txt > ${prefix}.kreport.txt cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/centrifuge/meta.yml b/modules/centrifuge/centrifuge/meta.yml similarity index 82% rename from modules/centrifuge/meta.yml rename to modules/centrifuge/centrifuge/meta.yml index aabb465f..a252c00c 100644 --- a/modules/centrifuge/meta.yml +++ b/modules/centrifuge/centrifuge/meta.yml @@ -1,4 +1,4 @@ -name: centrifuge +name: centrifuge_centrifuge description: Classifies metagenomic sequence data keywords: - classify @@ -25,11 +25,7 @@ input: respectively. - db: type: directory - description: Centrifuge database in .tar.gz format - pattern: "*.tar.gz" - - db_name: - type: string - description: Centrifuge database filenames without the suffix ".cf" + description: Path to directory containing centrifuge database files - save_unaligned: type: value description: If true unmapped fastq files are saved @@ -52,12 +48,6 @@ output: description: | File containing classification results pattern: "*.{results.txt}" - - kreport: - type: file - description: | - File containing kraken-style report from centrifuge - out files. - pattern: "*.{kreport.txt}" - fastq_unmapped: type: file description: Unmapped fastq files diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index d47b95c4..64779036 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -395,9 +395,9 @@ cellranger/mkref: - modules/cellranger/gtf/** - tests/modules/cellranger/gtf/** -centrifuge: - - modules/centrifuge/** - - tests/modules/centrifuge/** +centrifuge/centrifuge: + - modules/centrifuge/centrifuge/** + - tests/modules/centrifuge/centrifuge/** checkm/lineagewf: - modules/checkm/lineagewf/** diff --git a/tests/modules/centrifuge/main.nf b/tests/modules/centrifuge/centrifuge/main.nf similarity index 66% rename from tests/modules/centrifuge/main.nf rename to tests/modules/centrifuge/centrifuge/main.nf index 37393ce5..7e44bd80 100644 --- a/tests/modules/centrifuge/main.nf +++ b/tests/modules/centrifuge/centrifuge/main.nf @@ -2,37 +2,36 @@ nextflow.enable.dsl = 2 -include { UNTAR } from '../../../modules/untar/main.nf' -include { CENTRIFUGE } from '../../../modules/centrifuge/main.nf' +include { UNTAR } from '../../../../modules/untar/main.nf' +include { CENTRIFUGE_CENTRIFUGE } from '../../../../modules/centrifuge/centrifuge/main.nf' -workflow test_centrifuge_single_end { +workflow test_centrifuge_centrifuge_single_end { input = [ [ id:'test', single_end:true ], // meta map [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] ] db = [ [], file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/minigut_cf.tar.gz', checkIfExists: true) ] - db_name = "minigut_cf" save_unaligned = true save_aligned = false sam_format = false UNTAR ( db ) - CENTRIFUGE ( input, UNTAR.out.untar.map{ it[1] },db_name, save_unaligned, save_aligned, sam_format ) + CENTRIFUGE_CENTRIFUGE ( input, UNTAR.out.untar.map{ it[1] }, save_unaligned, save_aligned, sam_format ) } -workflow test_centrifuge_paired_end { +workflow test_centrifuge_centrifuge_paired_end { input = [ [ id:'test', single_end:false ], // meta map [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] ] db = [ [], file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/minigut_cf.tar.gz', checkIfExists: true) ] - db_name = "minigut_cf" + //db_name = "minigut_cf" save_unaligned = true save_aligned = false sam_format = false UNTAR ( db ) - CENTRIFUGE ( input, UNTAR.out.untar.map{ it[1] }, db_name, save_unaligned, save_aligned, sam_format ) + CENTRIFUGE_CENTRIFUGE ( input, UNTAR.out.untar.map{ it[1] }, save_unaligned, save_aligned, sam_format ) } diff --git a/tests/modules/centrifuge/nextflow.config b/tests/modules/centrifuge/centrifuge/nextflow.config similarity index 100% rename from tests/modules/centrifuge/nextflow.config rename to tests/modules/centrifuge/centrifuge/nextflow.config diff --git a/tests/modules/centrifuge/test.yml b/tests/modules/centrifuge/centrifuge/test.yml similarity index 51% rename from tests/modules/centrifuge/test.yml rename to tests/modules/centrifuge/centrifuge/test.yml index a7b4360b..641ca7ef 100644 --- a/tests/modules/centrifuge/test.yml +++ b/tests/modules/centrifuge/centrifuge/test.yml @@ -1,20 +1,20 @@ -- name: centrifuge test_centrifuge_single_end - command: nextflow run tests/modules/centrifuge -entry test_centrifuge_single_end -c tests/config/nextflow.config +- name: centrifuge centrifuge test_centrifuge_centrifuge_single_end + command: nextflow run tests/modules/centrifuge/centrifuge -entry test_centrifuge_centrifuge_single_end -c tests/config/nextflow.config tags: - centrifuge + - centrifuge/centrifuge files: - - path: output/centrifuge/test.kreport.txt - path: output/centrifuge/test.report.txt - path: output/centrifuge/test.results.txt - path: output/centrifuge/test.unmapped.fastq.gz - path: output/centrifuge/versions.yml -- name: centrifuge test_centrifuge_paired_end - command: nextflow run tests/modules/centrifuge -entry test_centrifuge_paired_end -c tests/config/nextflow.config +- name: centrifuge centrifuge test_centrifuge_centrifuge_paired_end + command: nextflow run tests/modules/centrifuge/centrifuge -entry test_centrifuge_centrifuge_paired_end -c tests/config/nextflow.config tags: - centrifuge + - centrifuge/centrifuge files: - - path: output/centrifuge/test.kreport.txt - path: output/centrifuge/test.report.txt - path: output/centrifuge/test.results.txt - path: output/centrifuge/test.unmapped.fastq.1.gz From f07936741656de27060de4a72b1f5292e25d4f98 Mon Sep 17 00:00:00 2001 From: Lucpen Date: Thu, 7 Apr 2022 13:50:58 +0200 Subject: [PATCH 10/15] Picard liftover vcf (#1431) * Building Picard liftovervcf module * Building Picard liftovervcf module_test * Building Picard liftovervcf pytest * Module for picard liftover vcf created * Fixed files after linting test * Fixed trailing whitespace * Checked files with prettier * further formatting with prettier * Fixed test.yml * Fixed input variable names * Changed contain test.liftef.vcf * Changed contain in test.yml test.liftef.vcf * Run prittier * Going back to previous version of test.yml * downgrading picard to 2.26.10 from 2.26.11 * Update modules/picard/liftovervcf/main.nf Co-authored-by: Sateesh Peri <33637490+sateeshperi@users.noreply.github.com> * Update modules/picard/liftovervcf/main.nf Print available memory Co-authored-by: Sateesh Peri <33637490+sateeshperi@users.noreply.github.com> * Output from .vcf to .vcf.gz * Added spaces to align emit * Update modules/picard/liftovervcf/meta.yml Co-authored-by: Sateesh Peri <33637490+sateeshperi@users.noreply.github.com> * Update modules/picard/liftovervcf/meta.yml Co-authored-by: Sateesh Peri <33637490+sateeshperi@users.noreply.github.com> * Update modules/picard/liftovervcf/meta.yml Co-authored-by: Sateesh Peri <33637490+sateeshperi@users.noreply.github.com> * Removing md5sum test Co-authored-by: jemten Co-authored-by: Sateesh Peri <33637490+sateeshperi@users.noreply.github.com> Co-authored-by: Maxime U. Garcia --- modules/picard/liftovervcf/main.nf | 49 +++++++++++++++++ modules/picard/liftovervcf/meta.yml | 55 +++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/picard/liftovervcf/main.nf | 17 ++++++ .../picard/liftovervcf/nextflow.config | 5 ++ tests/modules/picard/liftovervcf/test.yml | 11 ++++ 6 files changed, 141 insertions(+) create mode 100644 modules/picard/liftovervcf/main.nf create mode 100644 modules/picard/liftovervcf/meta.yml create mode 100644 tests/modules/picard/liftovervcf/main.nf create mode 100644 tests/modules/picard/liftovervcf/nextflow.config create mode 100644 tests/modules/picard/liftovervcf/test.yml diff --git a/modules/picard/liftovervcf/main.nf b/modules/picard/liftovervcf/main.nf new file mode 100644 index 00000000..cdbd637e --- /dev/null +++ b/modules/picard/liftovervcf/main.nf @@ -0,0 +1,49 @@ +process PICARD_LIFTOVERVCF { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::picard=2.26.10" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:2.26.10--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.26.10--hdfd78af_0' }" + + input: + tuple val(meta), path(input_vcf) + path dict + path chain + path fasta + + output: + tuple val(meta), path("*lifted.vcf.gz") , emit: vcf_lifted + tuple val(meta), path("*unlifted.vcf.gz"), emit: vcf_unlifted + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 1 + if (!task.memory) { + log.info '[Picard LiftoverVcf] Available memory not known - defaulting to 1GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + picard \\ + -Xmx${avail_mem}g \\ + LiftoverVcf \\ + $args \\ + I=$input_vcf \\ + O=${prefix}.lifted.vcf.gz \\ + CHAIN=$chain \\ + REJECT=${prefix}.unlifted.vcf.gz \\ + R=$fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(picard LiftoverVcf --version 2>&1 | grep -o 'Version.*' | cut -f2- -d:) + END_VERSIONS + """ +} diff --git a/modules/picard/liftovervcf/meta.yml b/modules/picard/liftovervcf/meta.yml new file mode 100644 index 00000000..55f04963 --- /dev/null +++ b/modules/picard/liftovervcf/meta.yml @@ -0,0 +1,55 @@ +name: picard_liftovervcf +description: convert between genome builds +keywords: + - liftOver + - picard +tools: + - picard: + description: Move annotations from one assembly to another + homepage: https://gatk.broadinstitute.org/hc/en-us/articles/360037060932-LiftoverVcf-Picard + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037060932-LiftoverVcf-Picard + tool_dev_url: https://github.com/broadinstitute/picard + doi: "" + licence: ["MIT"] + +input: + - meta: + type: map + description: Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_vcf: + type: file + description: VCF file + pattern: "*.{vcf,vcf.gz}" + - chain: + type: file + description: The liftover chain file + - fasta: + type: file + description: fasta file + pattern: "*.fasta" + - dict: + type: file + description: dictionary for fasta file + pattern: "*.{dict}" + +output: + - meta: + type: map + description: Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf_lifted: + type: file + description: VCF file containing successfully lifted variants + pattern: "*.{lifted.vcf.gz}" + - vcf_unlifted: + type: file + description: VCF file containing unsuccessfully lifted variants + pattern: "*.{unlifted.vcf.gz}" + +authors: + - "@lucpen" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 64779036..364d1f53 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1351,6 +1351,10 @@ picard/fixmateinformation: - modules/picard/fixmateinformation/** - tests/modules/picard/fixmateinformation/** +picard/liftovervcf: + - modules/picard/liftovervcf/** + - tests/modules/picard/liftovervcf/** + picard/markduplicates: - modules/picard/markduplicates/** - tests/modules/picard/markduplicates/** diff --git a/tests/modules/picard/liftovervcf/main.nf b/tests/modules/picard/liftovervcf/main.nf new file mode 100644 index 00000000..8aee8273 --- /dev/null +++ b/tests/modules/picard/liftovervcf/main.nf @@ -0,0 +1,17 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { PICARD_LIFTOVERVCF } from '../../../../modules/picard/liftovervcf/main.nf' + +workflow test_picard_liftovervcf { + + input_vcf = [ [ id:'test' ], + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true) + ] + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + chain = file(params.test_data['homo_sapiens']['genome']['genome_chain_gz'], checkIfExists: true) + fasta = [ file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) ] + + PICARD_LIFTOVERVCF ( input_vcf, dict, chain, fasta ) +} diff --git a/tests/modules/picard/liftovervcf/nextflow.config b/tests/modules/picard/liftovervcf/nextflow.config new file mode 100644 index 00000000..e1581bb9 --- /dev/null +++ b/tests/modules/picard/liftovervcf/nextflow.config @@ -0,0 +1,5 @@ +process { + ext.args = "WARN_ON_MISSING_CONTIG=true" + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} diff --git a/tests/modules/picard/liftovervcf/test.yml b/tests/modules/picard/liftovervcf/test.yml new file mode 100644 index 00000000..b1b30d5d --- /dev/null +++ b/tests/modules/picard/liftovervcf/test.yml @@ -0,0 +1,11 @@ +- name: picard liftovervcf test_picard_liftovervcf + command: nextflow run tests/modules/picard/liftovervcf -entry test_picard_liftovervcf -c tests/config/nextflow.config + tags: + - picard/liftovervcf + - picard + files: + - path: output/picard/test.lifted.vcf.gz + contains: + - "chr22" + - path: output/picard/test.unlifted.vcf.gz + - path: output/picard/versions.yml From 9ae34a01d1747019fd37753ff4cafb05aec35a2b Mon Sep 17 00:00:00 2001 From: FriederikeHanssen Date: Fri, 8 Apr 2022 11:43:40 +0200 Subject: [PATCH 11/15] Fix Controlfreec: Add stub runs to test single sample input & make conda work with R scripts (#1504) * Fix typo * Add stub runs for testing input without matched normals * Add missing -stub-run * remove empty file checksum tests and change workflow names * test controlfreec naming * fix output file names * fix output file names * fix output file names * fix conda and container path difference for R scripts * update tar version to work with conda * fix version number in docker * try to fix path to script, pretty sure it won't work * try new ways to set path with wildcard * try which * add which but with escape * remove comment --- .../controlfreec/assesssignificance/main.nf | 13 ++++++- modules/controlfreec/freec/main.nf | 20 ++++++++++- modules/controlfreec/freec2bed/main.nf | 11 ++++++ modules/controlfreec/freec2circos/main.nf | 11 ++++++ modules/controlfreec/makegraph/main.nf | 14 +++++++- modules/untar/main.nf | 6 ++-- .../controlfreec/assesssignificance/main.nf | 35 ++++++++++++++++++ .../controlfreec/assesssignificance/test.yml | 10 +++++- tests/modules/controlfreec/freec/main.nf | 33 +++++++++++++++++ tests/modules/controlfreec/freec/test.yml | 16 ++++++++- tests/modules/controlfreec/freec2bed/main.nf | 36 ++++++++++++++++++- tests/modules/controlfreec/freec2bed/test.yml | 8 +++++ .../modules/controlfreec/freec2circos/main.nf | 34 ++++++++++++++++++ .../controlfreec/freec2circos/test.yml | 8 +++++ tests/modules/controlfreec/makegraph/main.nf | 35 ++++++++++++++++++ tests/modules/controlfreec/makegraph/test.yml | 10 ++++++ 16 files changed, 291 insertions(+), 9 deletions(-) diff --git a/modules/controlfreec/assesssignificance/main.nf b/modules/controlfreec/assesssignificance/main.nf index f85a3c7f..4bdb00b3 100644 --- a/modules/controlfreec/assesssignificance/main.nf +++ b/modules/controlfreec/assesssignificance/main.nf @@ -21,7 +21,7 @@ process CONTROLFREEC_ASSESSSIGNIFICANCE { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - cat /usr/local/bin/assess_significance.R | R --slave --args ${cnvs} ${ratio} + cat \$(which assess_significance.R) | R --slave --args ${cnvs} ${ratio} mv *.p.value.txt ${prefix}.p.value.txt @@ -30,4 +30,15 @@ process CONTROLFREEC_ASSESSSIGNIFICANCE { controlfreec: \$(echo \$(freec -version 2>&1) | sed 's/^.*Control-FREEC //; s/:.*\$//' | sed -e "s/Control-FREEC v//g" ) END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.p.value.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + controlfreec: \$(echo \$(freec -version 2>&1) | sed 's/^.*Control-FREEC //; s/:.*\$//' | sed -e "s/Control-FREEC v//g" ) + END_VERSIONS + """ } diff --git a/modules/controlfreec/freec/main.nf b/modules/controlfreec/freec/main.nf index eb66eeaa..857ffdee 100644 --- a/modules/controlfreec/freec/main.nf +++ b/modules/controlfreec/freec/main.nf @@ -21,7 +21,7 @@ process CONTROLFREEC_FREEC { output: tuple val(meta), path("*_ratio.BedGraph") , emit: bedgraph, optional: true - tuple val(meta), path("*_control.cpn") , emit: control_cpn + tuple val(meta), path("*_control.cpn") , emit: control_cpn, optional: true tuple val(meta), path("*_sample.cpn") , emit: sample_cpn tuple val(meta), path("GC_profile.*.cpn") , emit: gcprofile_cpn, optional:true tuple val(meta), path("*_BAF.txt") , emit: BAF @@ -155,4 +155,22 @@ process CONTROLFREEC_FREEC { controlfreec: \$(echo \$(freec -version 2>&1) | sed 's/^.*Control-FREEC //; s/:.*\$//' | sed -e "s/Control-FREEC v//g" ) END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_ratio.BedGraph + touch ${prefix}_sample.cpn + touch GC_profile.${prefix}.cpn + touch ${prefix}_BAF.txt + touch ${prefix}_CNVs + touch ${prefix}_info.txt + touch ${prefix}_ratio.txt + touch config.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + controlfreec: \$(echo \$(freec -version 2>&1) | sed 's/^.*Control-FREEC //; s/:.*\$//' | sed -e "s/Control-FREEC v//g" ) + END_VERSIONS + """ } diff --git a/modules/controlfreec/freec2bed/main.nf b/modules/controlfreec/freec2bed/main.nf index 880e4716..aefc200e 100644 --- a/modules/controlfreec/freec2bed/main.nf +++ b/modules/controlfreec/freec2bed/main.nf @@ -28,4 +28,15 @@ process CONTROLFREEC_FREEC2BED { controlfreec: \$(echo \$(freec -version 2>&1) | sed 's/^.*Control-FREEC //; s/:.*\$//' | sed -e "s/Control-FREEC v//g" ) END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + controlfreec: \$(echo \$(freec -version 2>&1) | sed 's/^.*Control-FREEC //; s/:.*\$//' | sed -e "s/Control-FREEC v//g" ) + END_VERSIONS + """ } diff --git a/modules/controlfreec/freec2circos/main.nf b/modules/controlfreec/freec2circos/main.nf index 8879d4c0..8f9be300 100644 --- a/modules/controlfreec/freec2circos/main.nf +++ b/modules/controlfreec/freec2circos/main.nf @@ -28,4 +28,15 @@ process CONTROLFREEC_FREEC2CIRCOS { controlfreec: \$(echo \$(freec -version 2>&1) | sed 's/^.*Control-FREEC //; s/:.*\$//' | sed -e "s/Control-FREEC v//g" ) END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.circos.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + controlfreec: \$(echo \$(freec -version 2>&1) | sed 's/^.*Control-FREEC //; s/:.*\$//' | sed -e "s/Control-FREEC v//g" ) + END_VERSIONS + """ } diff --git a/modules/controlfreec/makegraph/main.nf b/modules/controlfreec/makegraph/main.nf index 9a0c7281..a8954d72 100644 --- a/modules/controlfreec/makegraph/main.nf +++ b/modules/controlfreec/makegraph/main.nf @@ -25,12 +25,24 @@ process CONTROLFREEC_MAKEGRAPH { def prefix = task.ext.prefix ?: "${meta.id}" def baf = baf ?: "" """ - cat /usr/local/bin/makeGraph.R | R --slave --args ${args} ${ratio} ${baf} + cat \$(which makeGraph.R) | R --slave --args ${args} ${ratio} ${baf} mv *_BAF.txt.png ${prefix}_BAF.png mv *_ratio.txt.log2.png ${prefix}_ratio.log2.png mv *_ratio.txt.png ${prefix}_ratio.png + cat <<-END_VERSIONS > versions.yml + "${task.process}": + controlfreec: \$(echo \$(freec -version 2>&1) | sed 's/^.*Control-FREEC //; s/:.*\$//' | sed -e "s/Control-FREEC v//g" ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_BAF.png + touch ${prefix}_ratio.log2.png + touch ${prefix}_ratio.png cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/untar/main.nf b/modules/untar/main.nf index 5aa6aa7f..bbfa0bfe 100644 --- a/modules/untar/main.nf +++ b/modules/untar/main.nf @@ -2,10 +2,10 @@ process UNTAR { tag "$archive" label 'process_low' - conda (params.enable_conda ? "conda-forge::tar=1.32" : null) + conda (params.enable_conda ? "conda-forge::tar=1.34" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : - 'biocontainers/biocontainers:v1.2.0_cv1' }" + 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv2/biocontainers_v1.2.0_cv2.img' : + 'biocontainers/biocontainers:v1.2.0_cv2' }" input: tuple val(meta), path(archive) diff --git a/tests/modules/controlfreec/assesssignificance/main.nf b/tests/modules/controlfreec/assesssignificance/main.nf index f8d8aa1d..e5ed1bf7 100644 --- a/tests/modules/controlfreec/assesssignificance/main.nf +++ b/tests/modules/controlfreec/assesssignificance/main.nf @@ -40,3 +40,38 @@ workflow test_controlfreec_assesssignificance { sig_in = CONTROLFREEC_FREEC.out.CNV.join(CONTROLFREEC_FREEC.out.ratio) CONTROLFREEC_ASSESSSIGNIFICANCE ( sig_in ) } + +workflow test_controlfreec_assesssignificance_single { + + input = [ + [ id:'test', single_end:false, sex:'XX' ], // meta map + [], + file(params.test_data['homo_sapiens']['illumina']['test2_mpileup'], checkIfExists: true), + [],[],[],[] + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) + + dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz'], checkIfExists: true) + dbsnp_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz_tbi'], checkIfExists: true) + + chrfiles = [ [], file(params.test_data['homo_sapiens']['genome']['genome_21_chromosomes_dir'], checkIfExists: true) ] + target_bed = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) + + UNTAR(chrfiles) + CONTROLFREEC_FREEC (input, + fasta, + fai, + [], + dbsnp, + dbsnp_tbi, + UNTAR.out.untar.map{ it[1] }, + [], + target_bed, + [] + ) + + sig_in = CONTROLFREEC_FREEC.out.CNV.join(CONTROLFREEC_FREEC.out.ratio) + CONTROLFREEC_ASSESSSIGNIFICANCE ( sig_in ) +} diff --git a/tests/modules/controlfreec/assesssignificance/test.yml b/tests/modules/controlfreec/assesssignificance/test.yml index f8393330..19e54acf 100644 --- a/tests/modules/controlfreec/assesssignificance/test.yml +++ b/tests/modules/controlfreec/assesssignificance/test.yml @@ -7,4 +7,12 @@ - path: output/controlfreec/test.p.value.txt md5sum: 44e23b916535fbc1a3f47b57fad292df - path: output/controlfreec/versions.yml - md5sum: 0aa42fed10d61e4570fe1e0e83ffe932 + +- name: controlfreec assesssignificance test_controlfreec_assesssignificance_single + command: nextflow run tests/modules/controlfreec/assesssignificance -entry test_controlfreec_assesssignificance_single -c tests/config/nextflow.config -stub-run + tags: + - controlfreec/assesssignificance + - controlfreec + files: + - path: output/controlfreec/test.p.value.txt + - path: output/controlfreec/versions.yml diff --git a/tests/modules/controlfreec/freec/main.nf b/tests/modules/controlfreec/freec/main.nf index d14c8f65..1f4a069b 100644 --- a/tests/modules/controlfreec/freec/main.nf +++ b/tests/modules/controlfreec/freec/main.nf @@ -36,3 +36,36 @@ workflow test_controlfreec_freec { [] ) } + +workflow test_controlfreec_freec_single { + + input = [ + [ id:'test2', single_end:false, sex:'XX' ], // meta map + [], + file(params.test_data['homo_sapiens']['illumina']['test2_mpileup'], checkIfExists: true), + [],[],[],[] + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) + + dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz'], checkIfExists: true) + dbsnp_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz_tbi'], checkIfExists: true) + + chrfiles = [ [], file(params.test_data['homo_sapiens']['genome']['genome_21_chromosomes_dir'], checkIfExists: true) ] + target_bed = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) + + UNTAR(chrfiles) + CONTROLFREEC_FREEC (input, + fasta, + fai, + [], + dbsnp, + dbsnp_tbi, + UNTAR.out.untar.map{ it[1] }, + [], + target_bed, + [] + ) +} + diff --git a/tests/modules/controlfreec/freec/test.yml b/tests/modules/controlfreec/freec/test.yml index d50fc063..1bd4e3a4 100644 --- a/tests/modules/controlfreec/freec/test.yml +++ b/tests/modules/controlfreec/freec/test.yml @@ -20,4 +20,18 @@ - path: output/controlfreec/test2.mpileup.gz_sample.cpn md5sum: c80dad58a77b1d7ba6d273999f4b4b4b - path: output/controlfreec/versions.yml - md5sum: 3ab250a2ab3be22628124c7c65324651 + +- name: controlfreec test_controlfreec_freec_single + command: nextflow run tests/modules/controlfreec/freec -entry test_controlfreec_freec_single -c tests/config/nextflow.config -stub-run + tags: + - controlfreec + - controlfreec/freec + files: + - path: output/controlfreec/config.txt + - path: output/controlfreec/test2_BAF.txt + - path: output/controlfreec/test2_CNVs + - path: output/controlfreec/test2_info.txt + - path: output/controlfreec/test2_ratio.BedGraph + - path: output/controlfreec/test2_ratio.txt + - path: output/controlfreec/test2_sample.cpn + - path: output/controlfreec/versions.yml diff --git a/tests/modules/controlfreec/freec2bed/main.nf b/tests/modules/controlfreec/freec2bed/main.nf index df121832..c1b0f04e 100644 --- a/tests/modules/controlfreec/freec2bed/main.nf +++ b/tests/modules/controlfreec/freec2bed/main.nf @@ -8,7 +8,7 @@ include { UNTAR } from '../../../../modules/untar/main.nf' workflow test_controlfreec_freec2bed { - input = [ + input = [ [ id:'test', single_end:false, sex:'XX' ], // meta map file(params.test_data['homo_sapiens']['illumina']['test_mpileup'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test2_mpileup'], checkIfExists: true), @@ -39,3 +39,37 @@ workflow test_controlfreec_freec2bed { CONTROLFREEC_FREEC2BED ( CONTROLFREEC_FREEC.out.ratio ) } + +workflow test_controlfreec_freec2bed_single { + + input = [ + [ id:'test', single_end:false, sex:'XX' ], // meta map + [], + file(params.test_data['homo_sapiens']['illumina']['test2_mpileup'], checkIfExists: true), + [],[],[],[] + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) + + dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz'], checkIfExists: true) + dbsnp_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz_tbi'], checkIfExists: true) + + chrfiles = [ [], file(params.test_data['homo_sapiens']['genome']['genome_21_chromosomes_dir'], checkIfExists: true) ] + target_bed = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) + + UNTAR(chrfiles) + CONTROLFREEC_FREEC (input, + fasta, + fai, + [], + dbsnp, + dbsnp_tbi, + UNTAR.out.untar.map{ it[1] }, + [], + target_bed, + [] + ) + + CONTROLFREEC_FREEC2BED ( CONTROLFREEC_FREEC.out.ratio ) +} diff --git a/tests/modules/controlfreec/freec2bed/test.yml b/tests/modules/controlfreec/freec2bed/test.yml index 0198bac6..9abb3a54 100644 --- a/tests/modules/controlfreec/freec2bed/test.yml +++ b/tests/modules/controlfreec/freec2bed/test.yml @@ -6,3 +6,11 @@ files: - path: output/controlfreec/test.bed md5sum: abe10b7ce94ba903503e697394c17297 + +- name: controlfreec freec2bed test_controlfreec_freec2bed_single + command: nextflow run tests/modules/controlfreec/freec2bed -entry test_controlfreec_freec2bed_single -c tests/config/nextflow.config -stub-run + tags: + - controlfreec/freec2bed + - controlfreec + files: + - path: output/controlfreec/test.bed diff --git a/tests/modules/controlfreec/freec2circos/main.nf b/tests/modules/controlfreec/freec2circos/main.nf index 9b655f0e..6b34edb6 100644 --- a/tests/modules/controlfreec/freec2circos/main.nf +++ b/tests/modules/controlfreec/freec2circos/main.nf @@ -39,3 +39,37 @@ workflow test_controlfreec_freec2circos { CONTROLFREEC_FREEC2CIRCOS ( CONTROLFREEC_FREEC.out.ratio ) } + +workflow test_controlfreec_freec2circos_single { + + input = [ + [ id:'test', single_end:false, sex:'XX' ], // meta map + [], + file(params.test_data['homo_sapiens']['illumina']['test2_mpileup'], checkIfExists: true), + [],[],[],[] + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) + + dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz'], checkIfExists: true) + dbsnp_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz_tbi'], checkIfExists: true) + + chrfiles = [ [], file(params.test_data['homo_sapiens']['genome']['genome_21_chromosomes_dir'], checkIfExists: true) ] + target_bed = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) + + UNTAR(chrfiles) + CONTROLFREEC_FREEC (input, + fasta, + fai, + [], + dbsnp, + dbsnp_tbi, + UNTAR.out.untar.map{ it[1] }, + [], + target_bed, + [] + ) + + CONTROLFREEC_FREEC2CIRCOS ( CONTROLFREEC_FREEC.out.ratio ) +} diff --git a/tests/modules/controlfreec/freec2circos/test.yml b/tests/modules/controlfreec/freec2circos/test.yml index 5758a828..c29111de 100644 --- a/tests/modules/controlfreec/freec2circos/test.yml +++ b/tests/modules/controlfreec/freec2circos/test.yml @@ -6,3 +6,11 @@ files: - path: output/controlfreec/test.circos.txt md5sum: 19cf35f2c36b46f717dc8342b8a5a645 + +- name: controlfreec freec2circos test_controlfreec_freec2circos_single + command: nextflow run tests/modules/controlfreec/freec2circos -entry test_controlfreec_freec2circos_single -c tests/config/nextflow.config -stub-run + tags: + - controlfreec + - controlfreec/freec2circos + files: + - path: output/controlfreec/test.circos.txt diff --git a/tests/modules/controlfreec/makegraph/main.nf b/tests/modules/controlfreec/makegraph/main.nf index ffea3d99..543216e1 100644 --- a/tests/modules/controlfreec/makegraph/main.nf +++ b/tests/modules/controlfreec/makegraph/main.nf @@ -40,3 +40,38 @@ workflow test_controlfreec_makegraph { makegraph_in = CONTROLFREEC_FREEC.out.ratio.join(CONTROLFREEC_FREEC.out.BAF) CONTROLFREEC_MAKEGRAPH ( makegraph_in ) } + +workflow test_controlfreec_makegraph_single { + + input = [ + [ id:'test', single_end:false, sex:'XX' ], // meta map + [], + file(params.test_data['homo_sapiens']['illumina']['test2_mpileup'], checkIfExists: true), + [],[],[],[] + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) + + dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz'], checkIfExists: true) + dbsnp_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz_tbi'], checkIfExists: true) + + chrfiles = [ [], file(params.test_data['homo_sapiens']['genome']['genome_21_chromosomes_dir'], checkIfExists: true) ] + target_bed = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) + + UNTAR(chrfiles) + CONTROLFREEC_FREEC (input, + fasta, + fai, + [], + dbsnp, + dbsnp_tbi, + UNTAR.out.untar.map{ it[1] }, + [], + target_bed, + [] + ) + + makegraph_in = CONTROLFREEC_FREEC.out.ratio.join(CONTROLFREEC_FREEC.out.BAF) + CONTROLFREEC_MAKEGRAPH ( makegraph_in ) +} diff --git a/tests/modules/controlfreec/makegraph/test.yml b/tests/modules/controlfreec/makegraph/test.yml index 21e78766..02d1a165 100644 --- a/tests/modules/controlfreec/makegraph/test.yml +++ b/tests/modules/controlfreec/makegraph/test.yml @@ -10,3 +10,13 @@ md5sum: b3c7916b1b4951a0cc3da20d8e9e0262 - path: output/controlfreec/test_ratio.png md5sum: 1435b29536b3b1555b4c423f8f4fb000 + +- name: controlfreec makegraph test_controlfreec_makegraph_single + command: nextflow run tests/modules/controlfreec/makegraph -entry test_controlfreec_makegraph_single -c tests/config/nextflow.config -stub-run + tags: + - controlfreec + - controlfreec/makegraph + files: + - path: output/controlfreec/test_BAF.png + - path: output/controlfreec/test_ratio.log2.png + - path: output/controlfreec/test_ratio.png From f57f085912a2b158eb224c21aeef45722a797aa6 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Fri, 8 Apr 2022 14:41:08 +0200 Subject: [PATCH 12/15] new tool snap-aligner/index (#1506) * add snapaligner/index * output fixes * fix outputs * fix tests * update inputs * fix more bugs * fix linting * Update modules/snapaligner/index/main.nf Co-authored-by: James A. Fellows Yates * Update modules/snapaligner/index/main.nf Co-authored-by: James A. Fellows Yates * fix comments * fix indents * fix escaping Co-authored-by: James A. Fellows Yates --- modules/snapaligner/index/main.nf | 59 +++++++++++++++++++ modules/snapaligner/index/meta.yml | 39 ++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/snapaligner/index/main.nf | 9 +++ .../modules/snapaligner/index/nextflow.config | 5 ++ tests/modules/snapaligner/index/test.yml | 13 ++++ 6 files changed, 129 insertions(+) create mode 100644 modules/snapaligner/index/main.nf create mode 100644 modules/snapaligner/index/meta.yml create mode 100644 tests/modules/snapaligner/index/main.nf create mode 100644 tests/modules/snapaligner/index/nextflow.config create mode 100644 tests/modules/snapaligner/index/test.yml diff --git a/modules/snapaligner/index/main.nf b/modules/snapaligner/index/main.nf new file mode 100644 index 00000000..6dc2c958 --- /dev/null +++ b/modules/snapaligner/index/main.nf @@ -0,0 +1,59 @@ +process SNAPALIGNER_INDEX { + tag '$fasta' + label 'process_high' + + conda (params.enable_conda ? "bioconda::snap-aligner=2.0.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/snap-aligner:2.0.1--hd03093a_1': + 'quay.io/biocontainers/snap-aligner:2.0.1--hd03093a_1' }" + + input: + path fasta + path altcontigfile + path nonaltcontigfile + path altliftoverfile + + output: + path "snap/*" ,emit: index + path "versions.yml" ,emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def altcontigfile_arg = altcontigfile ? '-altContigFile ' + altcontigfile : '' + def nonaltcontigfile_arg = nonaltcontigfile ? '-nonAltContigFile ' + nonaltcontigfile : '' + def altliftoverfile_arg = altliftoverfile ? '-altLiftoverFile ' + altliftoverfile : '' + """ + mkdir snap + + snap-aligner \\ + index \\ + $fasta \\ + snap \\ + -t${task.cpus} \\ + $altcontigfile_arg \\ + $nonaltcontigfile_arg \\ + $altliftoverfile_arg \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + snapaligner: \$(snap-aligner 2>&1| head -n 1 | sed 's/^.*version //') + END_VERSIONS + """ + stub: + """ + mkdir snap + echo "Genome" > snap/Genome + echo "GenomeIndex" > snap/GenomeIndex + echo "GenomeIndexHash" > snap/GenomeIndexHash + echo "OverflowTable" > snap/OverflowTable + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + snapaligner: \$(snap-aligner 2>&1| head -n 1 | sed 's/^.*version //;s/\.\$//') + END_VERSIONS + """ +} diff --git a/modules/snapaligner/index/meta.yml b/modules/snapaligner/index/meta.yml new file mode 100644 index 00000000..6d5e0f19 --- /dev/null +++ b/modules/snapaligner/index/meta.yml @@ -0,0 +1,39 @@ +name: "snapaligner_index" +description: Create a SNAP index for reference genome +keywords: + - index + - fasta + - genome + - reference +tools: + - "snapaligner": + description: "Scalable Nucleotide Alignment Program -- a fast and accurate read aligner for high-throughput sequencing data" + homepage: "http://snap.cs.berkeley.edu" + documentation: "https://1drv.ms/b/s!AhuEg_0yZD86hcpblUt-muHKYsG8fA?e=R8ogug" + tool_dev_url: "https://github.com/amplab/snap" + doi: "10.1101/2021.11.23.469039" + licence: "['Apache v2']" +input: + - fasta: + type: file + description: Input genome fasta file + - altcontigfile: + type: file + description: Optional file with a list of alt contig names, one per line. + - nonaltcontigfile: + type: file + description: Optional file that contains a list of contigs (one per line) that will not be marked ALT regardless of size. + - altliftoverfile: + type: file + description: Optional file containing ALT-to-REF mappings (SAM format). e.g., hs38DH.fa.alt from bwa-kit. +output: + - index: + type: file + description: SNAP genome index files + pattern: "{Genome,GenomeIndex,GenomeIndexHash,OverflowTable}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@matthdsm" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 364d1f53..31f62c78 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1663,6 +1663,10 @@ sistr: - modules/sistr/** - tests/modules/sistr/** +snapaligner/index: + - modules/snapaligner/index/** + - tests/modules/snapaligner/index/** + snpdists: - modules/snpdists/** - tests/modules/snpdists/** diff --git a/tests/modules/snapaligner/index/main.nf b/tests/modules/snapaligner/index/main.nf new file mode 100644 index 00000000..4cebb876 --- /dev/null +++ b/tests/modules/snapaligner/index/main.nf @@ -0,0 +1,9 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { SNAPALIGNER_INDEX } from '../../../../modules/snapaligner/index/main.nf' + +workflow test_snapaligner_index { + SNAPALIGNER_INDEX ( file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),[],[],[]) +} diff --git a/tests/modules/snapaligner/index/nextflow.config b/tests/modules/snapaligner/index/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/snapaligner/index/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/snapaligner/index/test.yml b/tests/modules/snapaligner/index/test.yml new file mode 100644 index 00000000..2c4b4935 --- /dev/null +++ b/tests/modules/snapaligner/index/test.yml @@ -0,0 +1,13 @@ +- name: snapaligner index test_snapaligner_index + command: nextflow run tests/modules/snapaligner/index -entry test_snapaligner_index -c tests/config/nextflow.config + tags: + - snapaligner/index + - snapaligner + files: + - path: output/snapaligner/snap/Genome + md5sum: 7e189c954142ba37460332b467e34ed4 + - path: output/snapaligner/snap/GenomeIndex + md5sum: 298da8bcb1134f7b24379a792a7a46f8 + - path: output/snapaligner/snap/GenomeIndexHash + - path: output/snapaligner/snap/OverflowTable + - path: output/snapaligner/versions.yml From e19a9a2474c6609875b49d8140a7264e21a1beee Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Fri, 8 Apr 2022 15:54:15 +0200 Subject: [PATCH 13/15] new tool: staden_io_lib (#1499) * new tool: staden_io_lib * update docker containers * add test.yml * add fai index input * typo * fix version.yml * update md5sum * omit md5sum for cram * move scramble to submodule * add missing in/output * remove some comments Co-authored-by: Sateesh Peri <33637490+sateeshperi@users.noreply.github.com> --- modules/stadeniolib/scramble/main.nf | 61 +++++++++++++++++++ modules/stadeniolib/scramble/meta.yml | 58 ++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/stadeniolib/scramble/main.nf | 15 +++++ .../stadeniolib/scramble/nextflow.config | 5 ++ tests/modules/stadeniolib/scramble/test.yml | 7 +++ 6 files changed, 150 insertions(+) create mode 100644 modules/stadeniolib/scramble/main.nf create mode 100644 modules/stadeniolib/scramble/meta.yml create mode 100644 tests/modules/stadeniolib/scramble/main.nf create mode 100644 tests/modules/stadeniolib/scramble/nextflow.config create mode 100644 tests/modules/stadeniolib/scramble/test.yml diff --git a/modules/stadeniolib/scramble/main.nf b/modules/stadeniolib/scramble/main.nf new file mode 100644 index 00000000..e24fb2cb --- /dev/null +++ b/modules/stadeniolib/scramble/main.nf @@ -0,0 +1,61 @@ +process STADENIOLIB_SCRAMBLE { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::staden_io_lib=1.14.14" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/staden_io_lib:1.14.14--h0d9da7e_3' : + 'quay.io/biocontainers/staden_io_lib:1.14.14--h0d9da7e_3' }" + + input: + tuple val(meta), path(reads) + path(fasta) + path(fai) + path(gzi) + + output: + tuple val(meta), path("*.cram") ,emit: cram + path "*.gzi" ,emit: gzi, optional: true + path "versions.yml" ,emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def inputformat = reads.getExtension + def outputformat = "cram" + if ("-O sam" in args) { + outputformat = "sam" + } else if ("-O bam" in args) { + outputformat = "bam" + } + + def reference = if fasta && fai : "--r ${fasta}" else "" + if (outputformat == "cram" && !reference) { + error "Cannot convert to CRAM without a reference" + } + + def gz_index = if gzi : "--g ${gzi}" else "" + if (outputformat == "cram" || outputformat == "sam") { + gz_index = "" + warning "Cannot use gzip index for CRAM or SAM output" + } + + """ + scramble \ + $args \ + -I ${inputformat} \ + $reference \ + -t $task.cpus \ + ${reads} \ + ${prefix}.${outputformat} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + stadeniolib: \$(echo \$(scramble -h | head -n 1 |sed 's/^.*version //')) + END_VERSIONS + """ +} diff --git a/modules/stadeniolib/scramble/meta.yml b/modules/stadeniolib/scramble/meta.yml new file mode 100644 index 00000000..7e53a1b4 --- /dev/null +++ b/modules/stadeniolib/scramble/meta.yml @@ -0,0 +1,58 @@ +name: "stadeniolib_scramble" +description: Advanced sequence file format conversions +keywords: + - sam + - bam + - cram + - compression +tools: + - "scramble": + description: "Staden Package 'io_lib' (sometimes referred to as libstaden-read by distributions). This contains code for reading and writing a variety of Bioinformatics / DNA Sequence formats." + homepage: "https://github.com/jkbonfield/io_lib" + documentation: "https://github.com/jkbonfield/io_lib/blob/master/README.md" + tool_dev_url: "https://github.com/jkbonfield/io_lib" + licence: "['BSD']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - fasta: + type: file + description: Reference genome in FASTA format + pattern: "*.{fa,fasta}" + - fai: + type: file + description: FASTA index file from samtools faidx + pattern: "*.{fai}" + - gzi: + type: file + description: Optional gzip index file for BAM inputs + pattern: "*.gzi" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reads: + type: file + description: Converted reads + pattern: "*.{sam, bam, cram}" + - gzi: + type: Optional file + description: gzip index file for BAM outputs + pattern: ".{bam.gzi}" +authors: + - "@matthdsm" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 31f62c78..2b99f835 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1707,6 +1707,10 @@ ssuissero: - modules/ssuissero/** - tests/modules/ssuissero/** +stadeniolib/scramble: + - modules/stadeniolib/scramble/** + - tests/modules/stadeniolib/scramble/** + staphopiasccmec: - modules/staphopiasccmec/** - tests/modules/staphopiasccmec/** diff --git a/tests/modules/stadeniolib/scramble/main.nf b/tests/modules/stadeniolib/scramble/main.nf new file mode 100644 index 00000000..d29c6dd8 --- /dev/null +++ b/tests/modules/stadeniolib/scramble/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { STADENIOLIB_SCRAMBLE } from '../../../../modules/stadeniolib/scramble/main.nf' + +workflow test_stadeniolib { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) + ] + + STADENIOLIB_SCRAMBLE ( input, file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true), []) +} diff --git a/tests/modules/stadeniolib/scramble/nextflow.config b/tests/modules/stadeniolib/scramble/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/stadeniolib/scramble/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/stadeniolib/scramble/test.yml b/tests/modules/stadeniolib/scramble/test.yml new file mode 100644 index 00000000..cea6fb70 --- /dev/null +++ b/tests/modules/stadeniolib/scramble/test.yml @@ -0,0 +1,7 @@ +- name: stadeniolib test_stadeniolib + command: nextflow run tests/modules/stadeniolib -entry test_stadeniolib -c tests/config/nextflow.config + tags: + - stadeniolib + files: + - path: output/stadeniolib/test.cram + - path: output/stadeniolib/versions.yml From d4160c669b1f7faad3177a847c53516ac932b0c8 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Fri, 8 Apr 2022 16:02:10 +0200 Subject: [PATCH 14/15] Tool/crosscheckfingerprints (#1505) * first commit * first commit * update test.yml * update test.yml * Update modules/picard/crosscheckfingerprints/main.nf Co-authored-by: Jose Espinosa-Carrasco * Update modules/picard/crosscheckfingerprints/main.nf Co-authored-by: Jose Espinosa-Carrasco * add support for vcf haplotype maps * update test * update test data config, use test data * fix exit code * Update modules/picard/crosscheckfingerprints/main.nf Co-authored-by: Sateesh Peri <33637490+sateeshperi@users.noreply.github.com> * Update modules/picard/crosscheckfingerprints/main.nf Co-authored-by: Sateesh Peri <33637490+sateeshperi@users.noreply.github.com> * remove unused stub Co-authored-by: Jose Espinosa-Carrasco Co-authored-by: Sateesh Peri <33637490+sateeshperi@users.noreply.github.com> --- modules/picard/crosscheckfingerprints/main.nf | 51 ++++++++++++++++++ .../picard/crosscheckfingerprints/meta.yml | 53 +++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/config/test_data.config | 1 + .../picard/crosscheckfingerprints/main.nf | 14 +++++ .../crosscheckfingerprints/nextflow.config | 5 ++ .../picard/crosscheckfingerprints/test.yml | 8 +++ 7 files changed, 136 insertions(+) create mode 100644 modules/picard/crosscheckfingerprints/main.nf create mode 100644 modules/picard/crosscheckfingerprints/meta.yml create mode 100644 tests/modules/picard/crosscheckfingerprints/main.nf create mode 100644 tests/modules/picard/crosscheckfingerprints/nextflow.config create mode 100644 tests/modules/picard/crosscheckfingerprints/test.yml diff --git a/modules/picard/crosscheckfingerprints/main.nf b/modules/picard/crosscheckfingerprints/main.nf new file mode 100644 index 00000000..b3dface5 --- /dev/null +++ b/modules/picard/crosscheckfingerprints/main.nf @@ -0,0 +1,51 @@ +process PICARD_CROSSCHECKFINGERPRINTS { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::picard=2.26.10" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:2.26.10--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.26.10--hdfd78af_0' }" + + input: + tuple val(meta), path(input1) + path input2 + path haplotype_map + + output: + tuple val(meta), path("*.crosscheck_metrics.txt"), emit: crosscheck_metrics + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def input1_string = input1.join(" --INPUT ") + def input2_string = input2 ? "--SECOND_INPUT " + input2.join(" --SECOND_INPUT ") : "" + + def avail_mem = 3 + if (!task.memory) { + log.info '[Picard CrosscheckFingerprints] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + picard \\ + -Xmx${avail_mem}g \\ + CrosscheckFingerprints \\ + $args \\ + --NUM_THREADS ${task.cpus} \\ + --INPUT $input1_string \\ + $input2_string \\ + --HAPLOTYPE_MAP ${haplotype_map} \\ + --OUTPUT ${prefix}.crosscheck_metrics.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$( picard CrosscheckFingerprints --version 2>&1 | grep -o 'Version:.*' | cut -f2- -d: ) + END_VERSIONS + """ +} diff --git a/modules/picard/crosscheckfingerprints/meta.yml b/modules/picard/crosscheckfingerprints/meta.yml new file mode 100644 index 00000000..4f2aff5d --- /dev/null +++ b/modules/picard/crosscheckfingerprints/meta.yml @@ -0,0 +1,53 @@ +name: "picard_crosscheckfingerprints" +description: Checks that all data in the set of input files appear to come from the same individual +keywords: + - alignment + - metrics + - statistics + - fingerprint + - bam +tools: + - picard: + description: | + A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) + data and formats such as SAM/BAM/CRAM and VCF. + homepage: https://broadinstitute.github.io/picard/ + documentation: https://broadinstitute.github.io/picard/ + tool_dev_url: https://github.com/broadinstitute/picard/ + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input1: + type: file + description: List containing 1 or more bam/vcf files or a file containing filepaths + pattern: "*.{bam,vcf,vcf.gz,txt,fofn}" + - input2: + type: file + description: Optional list containing 1 or more bam/vcf files or a file containing filepaths + pattern: "*.{bam,vcf,vcf.gz,txt,fofn}" + - haplotype_map: + type: file + description: Haplotype map file + pattern: "*.{txt,vcf,vcf.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - crosscheck_metrics: + type: file + description: Metrics created by crosscheckfingerprints + pattern: "*.{crosscheck_metrics.txt}" + +authors: + - "@matthdsm" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 2b99f835..c0e84cbc 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1343,6 +1343,10 @@ picard/createsequencedictionary: - modules/picard/createsequencedictionary/** - tests/modules/picard/createsequencedictionary/** +picard/crosscheckfingerprints: + - modules/picard/crosscheckfingerprints/** + - tests/modules/picard/crosscheckfingerprints/** + picard/filtersamreads: - modules/picard/filtersamreads/** - tests/modules/picard/filtersamreads/** diff --git a/tests/config/test_data.config b/tests/config/test_data.config index 836604b8..1a5c377c 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -161,6 +161,7 @@ params { gnomad_r2_1_1_21_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz.tbi" mills_and_1000g_indels_21_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz" mills_and_1000g_indels_21_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz.tbi" + haplotype_map = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/germlineresources/haplotype_map.txt" index_salmon = "${test_data_dir}/genomics/homo_sapiens/genome/index/salmon" repeat_expansions = "${test_data_dir}/genomics/homo_sapiens/genome/loci/repeat_expansions.json" diff --git a/tests/modules/picard/crosscheckfingerprints/main.nf b/tests/modules/picard/crosscheckfingerprints/main.nf new file mode 100644 index 00000000..55ddb5c5 --- /dev/null +++ b/tests/modules/picard/crosscheckfingerprints/main.nf @@ -0,0 +1,14 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { PICARD_CROSSCHECKFINGERPRINTS } from '../../../../modules/picard/crosscheckfingerprints/main.nf' + +workflow test_picard_crosscheckfingerprints { + + input = [ + [ id:'test', single_end:false ], // meta map + [file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)], + ] + PICARD_CROSSCHECKFINGERPRINTS ( input,[], file(params.test_data['homo_sapiens']['genome']['haplotype_map'], checkIfExists: true)) +} diff --git a/tests/modules/picard/crosscheckfingerprints/nextflow.config b/tests/modules/picard/crosscheckfingerprints/nextflow.config new file mode 100644 index 00000000..aa696290 --- /dev/null +++ b/tests/modules/picard/crosscheckfingerprints/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + withName: PICARD_CROSSCHECKFINGERPRINTS {ext.args = "--EXIT_CODE_WHEN_MISMATCH 0"} +} diff --git a/tests/modules/picard/crosscheckfingerprints/test.yml b/tests/modules/picard/crosscheckfingerprints/test.yml new file mode 100644 index 00000000..534c206d --- /dev/null +++ b/tests/modules/picard/crosscheckfingerprints/test.yml @@ -0,0 +1,8 @@ +- name: "picard crosscheckfingerprints" + command: nextflow run ./tests/modules/picard/crosscheckfingerprints -entry test_picard_crosscheckfingerprints -c ./tests/config/nextflow.config -c ./tests/modules/picard/crosscheckfingerprints/nextflow.config + tags: + - "picard" + - "picard/crosscheckfingerprints" + files: + - path: "output/picard/test.crosscheck_metrics.txt" + - path: output/picard/versions.yml From 8dc680d3b334c6622d1edfa3b97d05dd318371e0 Mon Sep 17 00:00:00 2001 From: Michael L Heuer Date: Sat, 9 Apr 2022 11:13:47 -0500 Subject: [PATCH 15/15] Update dsh-bio to version 2.0.8. (#1483) --- modules/dshbio/exportsegments/main.nf | 6 +++--- modules/dshbio/filterbed/main.nf | 6 +++--- modules/dshbio/filtergff3/main.nf | 6 +++--- modules/dshbio/splitbed/main.nf | 6 +++--- modules/dshbio/splitgff3/main.nf | 6 +++--- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/modules/dshbio/exportsegments/main.nf b/modules/dshbio/exportsegments/main.nf index 49442f81..03d0e91a 100644 --- a/modules/dshbio/exportsegments/main.nf +++ b/modules/dshbio/exportsegments/main.nf @@ -2,10 +2,10 @@ process DSHBIO_EXPORTSEGMENTS { tag "${meta.id}" label 'process_medium' - conda (params.enable_conda ? "bioconda::dsh-bio=2.0.7" : null) + conda (params.enable_conda ? "bioconda::dsh-bio=2.0.8" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/dsh-bio:2.0.7--hdfd78af_0' : - 'quay.io/biocontainers/dsh-bio:2.0.7--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/dsh-bio:2.0.8--hdfd78af_0' : + 'quay.io/biocontainers/dsh-bio:2.0.8--hdfd78af_0' }" input: tuple val(meta), path(gfa) diff --git a/modules/dshbio/filterbed/main.nf b/modules/dshbio/filterbed/main.nf index 7e3da24e..7a0a4d86 100644 --- a/modules/dshbio/filterbed/main.nf +++ b/modules/dshbio/filterbed/main.nf @@ -2,10 +2,10 @@ process DSHBIO_FILTERBED { tag "${meta.id}" label 'process_medium' - conda (params.enable_conda ? "bioconda::dsh-bio=2.0.7" : null) + conda (params.enable_conda ? "bioconda::dsh-bio=2.0.8" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/dsh-bio:2.0.7--hdfd78af_0' : - 'quay.io/biocontainers/dsh-bio:2.0.7--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/dsh-bio:2.0.8--hdfd78af_0' : + 'quay.io/biocontainers/dsh-bio:2.0.8--hdfd78af_0' }" input: tuple val(meta), path(bed) diff --git a/modules/dshbio/filtergff3/main.nf b/modules/dshbio/filtergff3/main.nf index 0539bbe0..c6736a49 100644 --- a/modules/dshbio/filtergff3/main.nf +++ b/modules/dshbio/filtergff3/main.nf @@ -2,10 +2,10 @@ process DSHBIO_FILTERGFF3 { tag "${meta.id}" label 'process_medium' - conda (params.enable_conda ? "bioconda::dsh-bio=2.0.7" : null) + conda (params.enable_conda ? "bioconda::dsh-bio=2.0.8" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/dsh-bio:2.0.7--hdfd78af_0' : - 'quay.io/biocontainers/dsh-bio:2.0.7--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/dsh-bio:2.0.8--hdfd78af_0' : + 'quay.io/biocontainers/dsh-bio:2.0.8--hdfd78af_0' }" input: tuple val(meta), path(gff3) diff --git a/modules/dshbio/splitbed/main.nf b/modules/dshbio/splitbed/main.nf index 824c7e4d..9268b5dc 100644 --- a/modules/dshbio/splitbed/main.nf +++ b/modules/dshbio/splitbed/main.nf @@ -2,10 +2,10 @@ process DSHBIO_SPLITBED { tag "${meta.id}" label 'process_medium' - conda (params.enable_conda ? "bioconda::dsh-bio=2.0.7" : null) + conda (params.enable_conda ? "bioconda::dsh-bio=2.0.8" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/dsh-bio:2.0.7--hdfd78af_0' : - 'quay.io/biocontainers/dsh-bio:2.0.7--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/dsh-bio:2.0.8--hdfd78af_0' : + 'quay.io/biocontainers/dsh-bio:2.0.8--hdfd78af_0' }" input: tuple val(meta), path(bed) diff --git a/modules/dshbio/splitgff3/main.nf b/modules/dshbio/splitgff3/main.nf index 424bc368..db887bd6 100644 --- a/modules/dshbio/splitgff3/main.nf +++ b/modules/dshbio/splitgff3/main.nf @@ -2,10 +2,10 @@ process DSHBIO_SPLITGFF3 { tag "${meta.id}" label 'process_medium' - conda (params.enable_conda ? "bioconda::dsh-bio=2.0.7" : null) + conda (params.enable_conda ? "bioconda::dsh-bio=2.0.8" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/dsh-bio:2.0.7--hdfd78af_0' : - 'quay.io/biocontainers/dsh-bio:2.0.7--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/dsh-bio:2.0.8--hdfd78af_0' : + 'quay.io/biocontainers/dsh-bio:2.0.8--hdfd78af_0' }" input: tuple val(meta), path(gff3)