From abe025677cdd805cc93032341ab19885473c1a07 Mon Sep 17 00:00:00 2001
From: Francesco L <53608000+lescai@users.noreply.github.com>
Date: Thu, 21 Apr 2022 14:33:59 +0200
Subject: [PATCH 01/10] update to kraken2: breaking change - output channels
 renamed (#1525)

* updated kraken2 module to include optional classification of each input reads, and make fastq outputs optional
NB: this is a breaking change, because the output channels have been renamed as a consequence of changes

* updated yml

* pigz command made optional, in order to be executed only if fastq of classified/unclassified reads are saved

* updated test yaml file for kraken2

* fixed TODOs and renamed variables and outputs

* untar in conda cannot keep same md5sum of version, and therefore md5sum check removed

* improved description of the options

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>
---
 modules/kraken2/kraken2/main.nf        | 23 ++++++++++-----
 modules/kraken2/kraken2/meta.yml       | 25 ++++++++++++----
 tests/modules/kraken2/kraken2/main.nf  | 14 +++++++--
 tests/modules/kraken2/kraken2/test.yml | 40 +++++++++++++++++---------
 4 files changed, 75 insertions(+), 27 deletions(-)

diff --git a/modules/kraken2/kraken2/main.nf b/modules/kraken2/kraken2/main.nf
index 3ec5df52..d4000233 100644
--- a/modules/kraken2/kraken2/main.nf
+++ b/modules/kraken2/kraken2/main.nf
@@ -10,12 +10,15 @@ process KRAKEN2_KRAKEN2 {
     input:
     tuple val(meta), path(reads)
     path  db
+    val save_output_fastqs
+    val save_reads_assignment
 
     output:
-    tuple val(meta), path('*classified*')  , emit: classified
-    tuple val(meta), path('*unclassified*'), emit: unclassified
-    tuple val(meta), path('*report.txt')   , emit: txt
-    path "versions.yml"                    , emit: versions
+    tuple val(meta), path('*classified*')     , optional:true, emit: classified_reads_fastq
+    tuple val(meta), path('*unclassified*')   , optional:true, emit: unclassified_reads_fastq
+    tuple val(meta), path('*classifiedreads*'), optional:true, emit: classified_reads_assignment
+    tuple val(meta), path('*report.txt')                     , emit: report
+    path "versions.yml"                                      , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
@@ -26,19 +29,25 @@ process KRAKEN2_KRAKEN2 {
     def paired       = meta.single_end ? "" : "--paired"
     def classified   = meta.single_end ? "${prefix}.classified.fastq"   : "${prefix}.classified#.fastq"
     def unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq"
+    def classified_command = save_output_fastqs ? "--classified-out ${classified}" : ""
+    def unclassified_command = save_output_fastqs ? "--unclassified-out ${unclassified}" : ""
+    def readclassification_command = save_reads_assignment ? "--output ${prefix}.kraken2.classifiedreads.txt" : ""
+    def compress_reads_command = save_output_fastqs ? "pigz -p $task.cpus *.fastq" : ""
+
     """
     kraken2 \\
         --db $db \\
         --threads $task.cpus \\
-        --unclassified-out $unclassified \\
-        --classified-out $classified \\
         --report ${prefix}.kraken2.report.txt \\
         --gzip-compressed \\
+        $unclassified_command \\
+        $classified_command \\
+        $readclassification_command \\
         $paired \\
         $args \\
         $reads
 
-    pigz -p $task.cpus *.fastq
+    $compress_reads_command
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/modules/kraken2/kraken2/meta.yml b/modules/kraken2/kraken2/meta.yml
index 9d6a3855..7129fe3a 100644
--- a/modules/kraken2/kraken2/meta.yml
+++ b/modules/kraken2/kraken2/meta.yml
@@ -27,25 +27,40 @@ input:
   - db:
       type: directory
       description: Kraken2 database
+  - save_output_fastqs:
+      type: boolean
+      description: |
+        If true, optional commands are added to save classified and unclassified reads
+        as fastq files
+  - save_reads_assignment:
+      type: boolean
+      description: |
+        If true, an optional command is added to save a file reporting the taxonomic
+        classification of each input read
 output:
   - meta:
       type: map
       description: |
         Groovy Map containing sample information
         e.g. [ id:'test', single_end:false ]
-  - classified:
+  - classified_reads_fastq:
       type: file
       description: |
-        Reads classified to belong to any of the taxa
+        Reads classified as belonging to any of the taxa
         on the Kraken2 database.
       pattern: "*{fastq.gz}"
-  - unclassified:
+  - unclassified_reads_fastq:
       type: file
       description: |
-        Reads not classified to belong to any of the taxa
+        Reads not classified to any of the taxa
         on the Kraken2 database.
       pattern: "*{fastq.gz}"
-  - txt:
+  - classified_reads_assignment:
+      type: file
+      description: |
+        Kraken2 output file indicating the taxonomic assignment of
+        each input read
+  - report:
       type: file
       description: |
         Kraken2 report containing stats about classified
diff --git a/tests/modules/kraken2/kraken2/main.nf b/tests/modules/kraken2/kraken2/main.nf
index 94f4db95..4a3593e4 100644
--- a/tests/modules/kraken2/kraken2/main.nf
+++ b/tests/modules/kraken2/kraken2/main.nf
@@ -12,7 +12,7 @@ workflow test_kraken2_kraken2_single_end {
     db    = [ [], file(params.test_data['sarscov2']['genome']['kraken2_tar_gz'], checkIfExists: true) ]
 
     UNTAR ( db )
-    KRAKEN2_KRAKEN2 ( input, UNTAR.out.untar.map{ it[1] } )
+    KRAKEN2_KRAKEN2 ( input, UNTAR.out.untar.map{ it[1] }, true, false )
 }
 
 workflow test_kraken2_kraken2_paired_end {
@@ -23,5 +23,15 @@ workflow test_kraken2_kraken2_paired_end {
     db    = [ [], file(params.test_data['sarscov2']['genome']['kraken2_tar_gz'], checkIfExists: true) ]
 
     UNTAR ( db )
-    KRAKEN2_KRAKEN2 ( input, UNTAR.out.untar.map{ it[1] } )
+    KRAKEN2_KRAKEN2 ( input, UNTAR.out.untar.map{ it[1] }, true, false )
+}
+
+workflow test_kraken2_kraken2_classifyreads {
+    input = [ [ id:'test', single_end:true ], // meta map
+              [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
+            ]
+    db    = [ [], file(params.test_data['sarscov2']['genome']['kraken2_tar_gz'], checkIfExists: true) ]
+
+    UNTAR ( db )
+    KRAKEN2_KRAKEN2 ( input, UNTAR.out.untar.map{ it[1] }, false, true )
 }
diff --git a/tests/modules/kraken2/kraken2/test.yml b/tests/modules/kraken2/kraken2/test.yml
index 1ec413bf..af1e6e0d 100644
--- a/tests/modules/kraken2/kraken2/test.yml
+++ b/tests/modules/kraken2/kraken2/test.yml
@@ -1,29 +1,43 @@
-- name: kraken2 kraken2 single-end
-  command: nextflow run ./tests/modules/kraken2/kraken2 -entry test_kraken2_kraken2_single_end -c ./tests/config/nextflow.config -c ./tests/modules/kraken2/kraken2/nextflow.config
+- name: kraken2 kraken2 test_kraken2_kraken2_single_end
+  command: nextflow run tests/modules/kraken2/kraken2 -entry test_kraken2_kraken2_single_end -c tests/config/nextflow.config
   tags:
     - kraken2
     - kraken2/kraken2
   files:
     - path: output/kraken2/test.classified.fastq.gz
-      should_exist: true
-    - path: output/kraken2/test.unclassified.fastq.gz
-      should_exist: true
     - path: output/kraken2/test.kraken2.report.txt
       md5sum: 4227755fe40478b8d7dc8634b489761e
+    - path: output/kraken2/test.unclassified.fastq.gz
+    - path: output/kraken2/versions.yml
+      md5sum: 6e3ad947ac8dee841a89216071c181cc
+    - path: output/untar/versions.yml
 
-- name: kraken2 kraken2 paired-end
-  command: nextflow run ./tests/modules/kraken2/kraken2 -entry test_kraken2_kraken2_paired_end -c ./tests/config/nextflow.config -c ./tests/modules/kraken2/kraken2/nextflow.config
+- name: kraken2 kraken2 test_kraken2_kraken2_paired_end
+  command: nextflow run tests/modules/kraken2/kraken2 -entry test_kraken2_kraken2_paired_end -c tests/config/nextflow.config
   tags:
     - kraken2
     - kraken2/kraken2
   files:
     - path: output/kraken2/test.classified_1.fastq.gz
-      should_exist: true
     - path: output/kraken2/test.classified_2.fastq.gz
-      should_exist: true
-    - path: output/kraken2/test.unclassified_1.fastq.gz
-      should_exist: true
-    - path: output/kraken2/test.unclassified_2.fastq.gz
-      should_exist: true
     - path: output/kraken2/test.kraken2.report.txt
       md5sum: 4227755fe40478b8d7dc8634b489761e
+    - path: output/kraken2/test.unclassified_1.fastq.gz
+    - path: output/kraken2/test.unclassified_2.fastq.gz
+    - path: output/kraken2/versions.yml
+      md5sum: 604482fe7a4519f890fae9c8beb1bd6e
+    - path: output/untar/versions.yml
+
+- name: kraken2 kraken2 test_kraken2_kraken2_classifyreads
+  command: nextflow run tests/modules/kraken2/kraken2 -entry test_kraken2_kraken2_classifyreads -c tests/config/nextflow.config
+  tags:
+    - kraken2
+    - kraken2/kraken2
+  files:
+    - path: output/kraken2/test.kraken2.classifiedreads.txt
+      md5sum: e7a90531f0d8d777316515c36fe4cae0
+    - path: output/kraken2/test.kraken2.report.txt
+      md5sum: 4227755fe40478b8d7dc8634b489761e
+    - path: output/kraken2/versions.yml
+      md5sum: 3488c304259e83c5bea573403293fce9
+    - path: output/untar/versions.yml

From f4c69bc4270186ca5ec174833dbe26cc0e1860fc Mon Sep 17 00:00:00 2001
From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com>
Date: Thu, 21 Apr 2022 15:39:22 +0200
Subject: [PATCH 02/10] Module/bclconvert (#1485)

* bclconvert: initial commit

* add most of tool

* attempt at adding testing stub

* add dockerfile + instructions

* add container to module

* update readme

* more attempts at making stubs work

* finish stub run

* fix ci issues

* more fixes to stub

* add read version check to stub

* fix some tests

* update readme

* fix version number

* syntax fix

* revert edit to output directory

* Update modules/bclconvert/meta.yml

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* Update modules/bclconvert/meta.yml

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* update meta.yaml

* update thread usage

* Update modules/bclconvert/main.nf

Co-authored-by: Edmund Miller <edmund.a.miller@protonmail.com>

* Escape env variable

* Update modules/bclconvert/Dockerfile

Co-authored-by: Mark Whelan <7407040+MrMarkW@users.noreply.github.com>

* fix comments by @Emiller88

* fix task.cpus

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>
Co-authored-by: Edmund Miller <edmund.a.miller@protonmail.com>
Co-authored-by: Mark Whelan <7407040+MrMarkW@users.noreply.github.com>
---
 modules/bclconvert/.gitignore            |  2 +
 modules/bclconvert/Dockerfile            | 15 +++++
 modules/bclconvert/LICENSE               | 30 +++++++++
 modules/bclconvert/README.md             | 17 +++++
 modules/bclconvert/main.nf               | 81 ++++++++++++++++++++++++
 modules/bclconvert/meta.yml              | 45 +++++++++++++
 tests/config/pytest_modules.yml          |  4 ++
 tests/modules/bclconvert/main.nf         | 22 +++++++
 tests/modules/bclconvert/nextflow.config |  5 ++
 tests/modules/bclconvert/test.yml        | 52 +++++++++++++++
 10 files changed, 273 insertions(+)
 create mode 100644 modules/bclconvert/.gitignore
 create mode 100644 modules/bclconvert/Dockerfile
 create mode 100644 modules/bclconvert/LICENSE
 create mode 100644 modules/bclconvert/README.md
 create mode 100644 modules/bclconvert/main.nf
 create mode 100644 modules/bclconvert/meta.yml
 create mode 100644 tests/modules/bclconvert/main.nf
 create mode 100644 tests/modules/bclconvert/nextflow.config
 create mode 100644 tests/modules/bclconvert/test.yml

diff --git a/modules/bclconvert/.gitignore b/modules/bclconvert/.gitignore
new file mode 100644
index 00000000..45b0ea3a
--- /dev/null
+++ b/modules/bclconvert/.gitignore
@@ -0,0 +1,2 @@
+bcl-convert
+*.rpm
diff --git a/modules/bclconvert/Dockerfile b/modules/bclconvert/Dockerfile
new file mode 100644
index 00000000..df3e1d0f
--- /dev/null
+++ b/modules/bclconvert/Dockerfile
@@ -0,0 +1,15 @@
+# Dockerfile to create container with bcl-convert
+# Push to nfcore/bclconvert:<VER>
+
+FROM debian:bullseye-slim
+LABEL authors="Matthias De Smet <matthias.desmet@ugent.be>" \
+    description="Docker image containing bcl-convert"
+# Disclaimer: this container is not provided nor supported by Illumina
+# 'ps' command is need by some nextflow executions to collect system stats
+# Install procps and clean apt cache
+RUN apt-get update \
+    && apt-get install -y \
+    procps \
+    && apt-get clean -y && rm -rf /var/lib/apt/lists/*
+COPY bcl-convert /usr/local/bin/bcl-convert
+RUN chmod +x /usr/local/bin/bcl-convert
diff --git a/modules/bclconvert/LICENSE b/modules/bclconvert/LICENSE
new file mode 100644
index 00000000..6f523227
--- /dev/null
+++ b/modules/bclconvert/LICENSE
@@ -0,0 +1,30 @@
+ILLUMINA END-USER SOFTWARE LICENSE AGREEMENT
+
+IMPORTANT-READ CAREFULLY. THIS IS A LICENSE AGREEMENT THAT YOU ARE REQUIRED TO ACCEPT BEFORE, DOWNLOADING, INSTALLING AND USING ANY SOFTWARE MADE AVAILABLE FROM THE ILLUMINA SUPPORT CENTER (https://support.illumina.com).
+
+CAREFULLY READ ALL THE TERMS AND CONDITIONS OF THIS LICENSE AGREEMENT BEFORE PROCEEDING WITH DOWNLOADING, INSTALLING, AND/OR USING THE SOFTWARE. YOU ARE NOT PERMITTED TO DOWNLOAD, INSTALL, AND/OR USE THE SOFTWARE UNTIL YOU HAVE AGREED TO BE BOUND BY ALL OF THE TERMS AND CONDITIONS OF THIS LICENSE AGREEMENT.  YOU REPRESENT AND WARRANT THAT YOU ARE DULY AUTHORIZED TO ACCEPT THE TERMS AND CONDITIONS OF THIS LICENSE AGREEMENT ON BEHALF OF YOUR EMPLOYER.
+
+Software made available through the Illumina Support Center is licensed, not sold, to you. Your license to each software program made available through the Illumina Support Center is subject to your prior acceptance of either this Illumina End-User Software License Agreement (“Agreement”), or a custom end user license agreement (“Custom EULA”), if one is provided with the software.  Any software that is subject to this Agreement is referred to herein as the “Software.” By accepting this Agreement, you agree the terms and conditions of this Agreement will apply to and govern any and all of your downloads, installations, and uses of each Illumina software program made available through the Illumina Support Center, except that your download, installation, and use of any software provided with a Custom EULA will be governed by the terms and conditions of the Custom EULA.
+
+This Agreement is made and entered into by and between Illumina, Inc., a Delaware corporation, having offices at 5200 Illumina Way, San Diego, CA 92122 (“Illumina”) and you as the end-user of the Software (hereinafter, “Licensee” or “you”).  All software, firmware, and associated media, printed materials, and online and electronic documentation, including any updates or upgrades thereof, made available through the Illumina Support Center (collectively, “Software”) provided to Licensee are for use solely by Licensee and the provisions herein WILL apply with respect to such Software.
+
+License Grant.  Subject to the terms and conditions of this Agreement, Illumina grants to Licensee, under the following terms and conditions, a personal, non-exclusive, revocable, non-transferable, non-sublicensable license, for its internal end-use purposes only, in the ordinary course of Licensee’s business to use the Software in executable object code form only, solely at the Licensee’s facility to, install and use the Software on a single computer accessible only by Licensee (and not on any public network or server), where the single computer is owned, leased, or otherwise substantially controlled by Licensee, for the purpose of processing and analyzing data generated from an Illumina genetic sequencing instrument owned and operated solely by Licensee (the “Product”). In the case of Software provided by Illumina in non-compiled form, Illumina grants Licensee a personal, non-exclusive,  non-sublicenseable, restricted right to compile, install, and use one copy of the Software solely for processing and analyzing data generated from the Product.
+License Restrictions. Except as expressly permitted in Section 1, Licensee may not make, have made, import, use, copy, reproduce, distribute, display, publish, sell, re-sell, lease, or sub-license the Software, in whole or in part, except as expressly provided for in this Agreement. Licensee may not modify, improve, translate, reverse engineer, decompile, disassemble, or create derivative works of the Software or otherwise attempt to (a) defeat, avoid, by-pass, remove, deactivate, or otherwise circumvent any software protection mechanisms in the Software including, without limitation, any such mechanism used to restrict or control the functionality of the Software, or (b) derive the source code or the underlying ideas, algorithms, structure, or organization form of the Software.  Licensee will not allow, at any time, including during and after the term of the license, the Software or any portions or copies thereof in any form to become available to any third parties.  Licensee may use the Software solely with genomic data that is generated using the Product; Licensee may not use the Software with any data generated from other products or instruments.  Licensee may not use the Software to perform any data analysis services for any third party.
+Ownership. The Software is protected by United States and international intellectual property laws. All right, title, and interest in and to the Software (including associated intellectual property rights) are and will remain vested in Illumina or Illumina’s affiliated companies or licensors. Licensee acknowledges that no rights, license or interest to any Illumina trademarks are granted hereunder. Licensee acknowledges that unauthorized reproduction or distribution of the Software, or any portion of it, may result in severe civil and criminal penalties.  Illumina reserves all rights in and to the Software not expressly granted to Licensee under this Agreement.
+Upgrades/Updates. Illumina may, at its sole discretion, provide updates or upgrades to the Software. In that case, Licensee WILL have the same rights and obligations under such updates or upgrades as it has for the versions of the Software initially provided to Licensee hereunder.  Licensee recognizes that Illumina is not obligated to provide any upgrades or updates to, or support for, the Software.
+Data Integrity/Loss. Licensee is responsible for the integrity and availability, including preventing the loss of data that Licensee generates, uses, analyzes, manages, or stores in connection with or through its use of the Software, including without limitation, investigating and implementing industry appropriate policies and procedures regarding the provision of access to Licensee’s data, monitoring access and use of Licensee’s data, conducting routine backups and archiving of Licensee’s data, and ensuring the adequacy of anti-virus software. Accordingly, Licensee agrees that Illumina is not responsible for any inability to access, loss or corruption of data as a result of Licensee’s use of the Software, and Illumina has no liability to Licensee in connection with such inability to access, loss or corruption of data.
+Term of License. This Agreement will be in effect from the time Licensee expressly accepts the terms and conditions of this license, or otherwise installs the Software, thereby accepting the terms and conditions contained herein, and will remain in effect until terminated. This license will otherwise terminate upon the conditions set forth in this Agreement, if revoked by Illumina, or if Licensee fails to comply with any term or condition of this Agreement including failure to pay any applicable license fee. Licensee agrees upon termination of this Agreement for any reason to immediately discontinue use of and un-install the Software and destroy all copies of the Software in its possession and/or under its control, and return or destroy, at Illumina’s option, any compact disks, floppy disks or other media provided by Illumina storing the Software thereon (together with any authorized copies thereof), as well as any documentation associated therewith
+Limited Warranty. Illumina warrants that, for a period of 6 months from the date of download or installation of the Software by Licensee, the Software will perform in all material respects in accordance with the accompanying documentation available on the Illumina Support Center. EXCEPT AND TO THE EXTENT EXPRESSLY PROVIDED IN THE FOREGOING, AND TO THE FULLEST EXTENT PERMITTED BY APPLICABLE LAW, THE SOFTWARE IS PROVIDED “AS IS” AND ILLUMINA EXPRESSLY DISCLAIMS ALL WARRANTIES AND CONDITIONS REGARDING THE SOFTWARE AND RESULTS GENERATED BY THE SOFTWARE, INCLUDING WITHOUT LIMITATION, TO THE FULLEST EXTENT PERMITTED BY APPLICABLE LAW, ALL OTHER EXPRESS OR IMPLIED WARRANTIES OR CONDITIONS OF MERCHANTABLE QUALITY, NON-INFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE, AND THOSE ARISING BY STATUTE OR OTHERWISE IN LAW OR FROM A COURSE OF DEALING OR USAGE OF TRADE.  ILLUMINA DOES NOT WARRANT THAT THE FUNCTIONS CONTAINED IN THE SOFTWARE WILL MEET LICENSEE"S REQUIREMENTS, OR THAT THE OPERATION OF THE SOFTWARE WILL BE ERROR FREE OR UNINTERRUPTED.
+Limitation of Liability.
+(a) ILLUMINA’S ENTIRE LIABILITY AND LICENSEE"S EXCLUSIVE REMEDY UNDER THE LIMITED WARRANTY PROVISION  OF SECTION 7 ABOVE WILL BE, AT ILLUMINA’S OPTION, EITHER (i) RETURN OF THE PRICE PAID FOR THE SOFTWARE, OR (ii) REPAIR OR REPLACEMENT OF THE PORTIONS OF THE SOFTWARE THAT DO NOT COMPLY WITH ILLUMINA’S LIMITED WARRANTY. THIS LIMITED WARRANTY IS VOID AND ILLUMINA WILL HAVE NO LIABILITY AT ALL IF FAILURE OF THE SOFTWARE TO COMPLY WITH ILLUMINA LIMITED WARRANTY HAS RESULTED FROM: (w) FAILURE TO USE THE SOFTWARE IN ACCORDANCE WITH ILLUMINA’S THEN CURRENT USER MANUAL OR THIS AGREEMENT; (x) ACCIDENT, ABUSE, OR MISAPPLICATION; (y) PRODUCTS OR EQUIPMENT NOT SPECIFIED BY ILLUMINA AS BEING COMPATIBLE WITH THE SOFTWARE; OR (z) IF LICENSEE HAS NOT NOTIFIED ILLUMINA IN WRITING OF THE DEFECT WITHIN THE ABOVE WARRANTY PERIOD.
+
+(b) TO THE FULLEST EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT WILL ILLUMINA BE LIABLE UNDER ANY THEORY OF CONTRACT, TORT, STRICT LIABILITY OR OTHER LEGAL OR EQUITABLE THEORY FOR ANY PERSONAL INJURY OR ANY INDIRECT, CONSEQUENTIAL, OR INCIDENTAL DAMAGES, EVEN IF ILLUMINA HAS BEEN ADVISED OF THE POSSIBILITY THEREOF INCLUDING, WITHOUT LIMITATION, LOST PROFITS, LOST DATA, INTERRUPTION OF BUSINESS, LOST BUSINESS REVENUE, OTHER ECONOMIC LOSS, OR ANY LOSS OF RECORDED DATA ARISING OUT OF THE USE OF OR INABILITY TO USE THE SOFTWARE. EXCEPT AND TO THE EXTENT EXPRESSLY PROVIDED IN SECTION 7 AND 8(a) ABOVE OR AS OTHERWISE PERMITTED BY LAW, IN NO EVENT WILL ILLUMINA’S TOTAL LIABILITY TO LICENSEE FOR ALL DAMAGES (OTHER THAN AS MAY BE REQUIRED BY APPLICABLE LAW IN CASES INVOLVING PERSONAL INJURY) EXCEED THE AMOUNT OF $500 USD.  THE FOREGOING LIMITATIONS WILL APPLY EVEN IF THE ABOVE STATED REMEDY FAILS OF ITS ESSENTIAL PURPOSE.
+
+Survival. The limitations of liability and ownership rights of Illumina contained herein and Licensee’s obligations following termination of this Agreement WILL survive the termination of this Agreement for any reason.
+Research Use Only. The Software is labeled with a For Research Use Only or similar labeling statement and the performance characteristics of the Software have not been established and the Software is not for use in diagnostic procedures.  Licensee acknowledges and agrees that (i) the Software has not been approved, cleared, or licensed by the United States Food and Drug Administration or any other regulatory entity whether foreign or domestic for any specific intended use, whether research, commercial, diagnostic, or otherwise, and (ii) Licensee must ensure it has any regulatory approvals that are necessary for Licensee’s intended uses of the Software.  Licensee will comply with all applicable laws and regulations when using and maintaining the Software.
+General. Licensee may not sublicense, assign, share, pledge, rent or transfer any of its rights under this Agreement in relation to the Software or any portion thereof including documentation. Illumina reserves the right to change this Agreement at any time. When Illumina makes any changes, Illumina will provide the updated Agreement, or a link to it, on Illumina’s website (www.illumina.com) and such updated Agreement WILL become effective immediately. Licensee’s continued access to or use of the Software represents Licensee’s agreement to any revised Agreement. If one or more provisions of this Agreement are found to be invalid or unenforceable, this Agreement WILL not be rendered inoperative but the remaining provisions WILL continue in full force and effect.  This Agreement constitutes the entire agreement between the parties with respect to the subject matter of this Agreement and merges all prior communications except that a “hard-copy” form of licensing agreement relating to the Software previously agreed to in writing by Illumina and Licensee WILL supersede and govern in the event of any conflicting provisions.
+Governing Law. This Agreement WILL be governed by and construed in accordance with the laws of the state of California, USA, without regard to its conflicts of laws principles, and independent of where a suit or action hereunder may be filed.
+U.S. Government End Users.  If Licensee is a branch agency or instrumentality of the United States Government, the following provision applies.  The Software is a “commercial item” as that term is defined at 48 C.F.R. 2.101, consisting of “commercial computer software” and “commercial computer software documentation,” as such terms are used in 48 C.F.R. 12.212 or 48 C.F.R. 227.7202 (as applicable).  Consistent with 48 C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4, all United States Government end users acquire the Software with only those rights set forth herein.
+Contact. Any questions regarding legal rights, duties, obligations, or restrictions associated with the software hereunder should be directed to Illumina, Inc., 5200 Illumina Way, San Diego, CA 92122, Attention: Legal Department, Phone: (858) 202-4500, Fax: (858) 202-4599, web site: www.illumina.com <http://www.illumina.com>.
+Third Party Components. The Software may include third party software (“Third Party Programs”). Some of the Third Party Programs are available under open source or free software licenses. The License Agreement accompanying the Licensed Software does not alter any rights or obligations Licensee may have under those open source or free software licenses.  The licenses that govern the terms and conditions of use of the Third Party Programs included in the Licensed Software are provided in the READ ME provided with the Software.  The READ ME also contains copyright statements for the various open source software components (or portions thereof) that are distributed with the Licensed Software.
+END OF END-USER SOFTWARE LICENSE AGREEMENT.
diff --git a/modules/bclconvert/README.md b/modules/bclconvert/README.md
new file mode 100644
index 00000000..4f8538d0
--- /dev/null
+++ b/modules/bclconvert/README.md
@@ -0,0 +1,17 @@
+# Updating the docker container and making a new module release
+
+bcl-convert is a commercial tool from Illumina. The container provided for the bcl-convert nf-core module is not provided nor supported by Illumina. Updating the bcl-convert versions in the container and pushing the update to Dockerhub needs to be done manually.
+
+1. Navigate to the appropriate download page. - [BCL Convert](https://support.illumina.com/sequencing/sequencing_software/bcl-convert/downloads.html): download the rpm of the desired bcl-convert version with `curl` or `wget`.
+2. Unpack the RPM package using `rpm2cpio bcl-convert-*.rpm | cpio -i --make-directories`. Place the executable located in `<unpack_dir>/usr/bin/bcl-convert` in the same folder where the Dockerfile lies.
+3. Create and test the container:
+
+   ```bash
+   docker build . -t nfcore/bclconvert:<VERSION>
+   ```
+
+4. Access rights are needed to push the container to the Dockerhub nfcore organization, please ask a core team member to do so.
+
+   ```bash
+   docker push nfcore/bclconvert:<VERSION>
+   ```
diff --git a/modules/bclconvert/main.nf b/modules/bclconvert/main.nf
new file mode 100644
index 00000000..e6925b50
--- /dev/null
+++ b/modules/bclconvert/main.nf
@@ -0,0 +1,81 @@
+process BCLCONVERT {
+    tag '$samplesheet'
+    label 'process_high'
+
+    if (params.enable_conda) {
+        exit 1, "Conda environments cannot be used when using bcl-convert. Please use docker or singularity containers."
+    }
+    container "nfcore/bclconvert:3.9.3"
+
+    input:
+    path samplesheet
+    path run_dir
+
+    output:
+    path "*.fastq.gz"               ,emit: fastq
+    path "Reports/*.{csv,xml,bin}"  ,emit: reports
+    path "Logs/*.{log,txt}"         ,emit: logs
+    path "InterOp/*.bin"            ,emit: interop
+    path "versions.yml"             ,emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+
+    """
+    bcl-convert \
+        $args \\
+        --output-directory . \\
+        --bcl-input-directory ${run_dir} \\
+        --sample-sheet ${samplesheet} \\
+        --bcl-num-parallel-tiles ${task.cpus}
+
+    mkdir InterOp
+    cp ${run_dir}/InterOp/*.bin InterOp/
+    mv Reports/*.bin InterOp/
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        bclconvert: \$(bcl-convert -V 2>&1 | head -n 1 | sed 's/^.*Version //')
+    END_VERSIONS
+    """
+
+    stub:
+    """
+    echo "sample1_S1_L001_R1_001" > sample1_S1_L001_R1_001.fastq.gz
+    echo "sample1_S1_L001_R2_001" > sample1_S1_L001_R2_001.fastq.gz
+    echo "sample1_S1_L002_R1_001" > sample1_S1_L002_R1_001.fastq.gz
+    echo "sample1_S1_L002_R2_001" > sample1_S1_L002_R2_001.fastq.gz
+    echo "sample2_S2_L001_R1_001" > sample2_S2_L001_R1_001.fastq.gz
+    echo "sample2_S2_L001_R2_001" > sample2_S2_L001_R2_001.fastq.gz
+    echo "sample2_S2_L002_R1_001" > sample2_S2_L002_R1_001.fastq.gz
+    echo "sample2_S2_L002_R2_001" > sample2_S2_L002_R2_001.fastq.gz
+
+    mkdir Reports
+    echo "Adapter_Metrics" >  Reports/Adapter_Metrics.csv
+    echo "Demultiplex_Stats" >  Reports/Demultiplex_Stats.csv
+    echo "fastq_list" >  Reports/fastq_list.csv
+    echo "Index_Hopping_Counts" >  Reports/Index_Hopping_Counts.csv
+    echo "IndexMetricsOut" >  Reports/IndexMetricsOut.bin
+    echo "Quality_Metrics" >  Reports/Quality_Metrics.csv
+    echo "RunInfo" >  Reports/RunInfo.xml
+    echo "SampleSheet" >  Reports/SampleSheet.csv
+    echo "Top_Unknown_Barcodes" >  Reports/Top_Unknown_Barcodes.csv
+
+    mkdir Logs
+    echo "Errors" > Logs/Errors.log
+    echo "FastqComplete" > Logs/FastqComplete.txt
+    echo "Info" > Logs/Info.log
+    echo "Warnings" > Logs/Warnings.log
+
+    mkdir InterOp/
+    echo "InterOp" > InterOp/InterOp.bin
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        bclconvert: \$(bcl-convert -V 2>&1 | head -n 1 | sed 's/^.*Version //')
+    END_VERSIONS
+    """
+}
diff --git a/modules/bclconvert/meta.yml b/modules/bclconvert/meta.yml
new file mode 100644
index 00000000..5c59a978
--- /dev/null
+++ b/modules/bclconvert/meta.yml
@@ -0,0 +1,45 @@
+name: "bclconvert"
+description: Demultiplex Illumina BCL files
+keywords:
+  - demultiplex
+  - illumina
+  - fastq
+tools:
+  - "bclconvert":
+      description: "Demultiplex Illumina BCL files"
+      homepage: "https://support.illumina.com/sequencing/sequencing_software/bcl-convert.html"
+      documentation: "https://support-docs.illumina.com/SW/BCL_Convert/Content/SW/FrontPages/BCL_Convert.htm"
+      licence: "ILLUMINA"
+
+input:
+  - samplesheet:
+      type: file
+      description: "Input samplesheet"
+      pattern: "*.{csv}"
+  - run_dir:
+      type: directory
+      description: "Input run directory containing RunInfo.xml and BCL data"
+
+output:
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - fastq:
+      type: file
+      description: Demultiplexed FASTQ files
+      pattern: "*.{fastq.gz}"
+  - reports:
+      type: file
+      description: Demultiplexing Reports
+      pattern: "Reports/*.{csv,xml}"
+  - logs:
+      type: file
+      description: Log files
+      pattern: "Logs/*.{log,txt}"
+  - interop:
+      type: file
+      description: Interop files
+      pattern: "Interop/*.{bin}"
+authors:
+  - "@matthdsm"
diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml
index 9a128bd4..786f87db 100644
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@@ -174,6 +174,10 @@ bcftools/view:
   - modules/bcftools/view/**
   - tests/modules/bcftools/view/**
 
+bclconvert:
+  - modules/bclconvert/**
+  - tests/modules/bclconvert/**
+
 bedtools/bamtobed:
   - modules/bedtools/bamtobed/**
   - tests/modules/bedtools/bamtobed/**
diff --git a/tests/modules/bclconvert/main.nf b/tests/modules/bclconvert/main.nf
new file mode 100644
index 00000000..e8a78e4f
--- /dev/null
+++ b/tests/modules/bclconvert/main.nf
@@ -0,0 +1,22 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { BCLCONVERT } from '../../../modules/bclconvert/main.nf'
+
+process STUB_BCLCONVERT_INPUT {
+    output:
+    path "SampleSheet.csv"          ,emit: samplesheet
+    path "DDMMYY_SERIAL_FLOWCELL"   ,emit: run_dir
+
+    stub:
+    """
+    mkdir DDMMYY_SERIAL_FLOWCELL
+    echo "SampleSheet" > SampleSheet.csv
+    """
+}
+
+workflow test_bclconvert {
+    STUB_BCLCONVERT_INPUT ()
+    BCLCONVERT (STUB_BCLCONVERT_INPUT.out.samplesheet, STUB_BCLCONVERT_INPUT.out.run_dir)
+}
diff --git a/tests/modules/bclconvert/nextflow.config b/tests/modules/bclconvert/nextflow.config
new file mode 100644
index 00000000..50f50a7a
--- /dev/null
+++ b/tests/modules/bclconvert/nextflow.config
@@ -0,0 +1,5 @@
+process {
+
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+    
+}
\ No newline at end of file
diff --git a/tests/modules/bclconvert/test.yml b/tests/modules/bclconvert/test.yml
new file mode 100644
index 00000000..65e71a59
--- /dev/null
+++ b/tests/modules/bclconvert/test.yml
@@ -0,0 +1,52 @@
+- name: bclconvert test_bclconvert
+  command: nextflow run tests/modules/bclconvert -entry test_bclconvert -c tests/config/nextflow.config -stub-run
+  tags:
+    - bclconvert
+  files:
+    - path: output/bclconvert/InterOp/InterOp.bin
+      md5sum: d3dea0bb4ab1c8754af324f40b001481
+    - path: output/bclconvert/Logs/Errors.log
+      md5sum: 334645f09074b2597a692e395b716a9c
+    - path: output/bclconvert/Logs/FastqComplete.txt
+      md5sum: a4c4c6ce2d0de67d3b7ac7d1fcb512e4
+    - path: output/bclconvert/Logs/Info.log
+      md5sum: d238822d379f2277cac950ca986cb660
+    - path: output/bclconvert/Logs/Warnings.log
+      md5sum: aeefd2d631817e170f88f25ecaaf4664
+    - path: output/bclconvert/Reports/Adapter_Metrics.csv
+      md5sum: af62e9c7b44940cfd8ea11064a1f42ae
+    - path: output/bclconvert/Reports/Demultiplex_Stats.csv
+      md5sum: d11313931fcaabb5ce159462ad3dd1da
+    - path: output/bclconvert/Reports/IndexMetricsOut.bin
+      md5sum: 6bcee11c8145e3b1059ceaa91d2f5be7
+    - path: output/bclconvert/Reports/Index_Hopping_Counts.csv
+      md5sum: 697e40e0c0d48b4bd25f138ef60b0bde
+    - path: output/bclconvert/Reports/Quality_Metrics.csv
+      md5sum: 3902fd38f6b01f1ce0f0e8724238f8f2
+    - path: output/bclconvert/Reports/RunInfo.xml
+      md5sum: 5bef7c7e76360231b0c4afdfc915fd44
+    - path: output/bclconvert/Reports/SampleSheet.csv
+      md5sum: c579e7d2c9c917c4cfb875a0373c0936
+    - path: output/bclconvert/Reports/Top_Unknown_Barcodes.csv
+      md5sum: 39a5e7f6d21c12d6051afdc8261b6330
+    - path: output/bclconvert/Reports/fastq_list.csv
+      md5sum: 32c51ab10e013fd547928de57361ffcb
+    - path: output/bclconvert/sample1_S1_L001_R1_001.fastq.gz
+      md5sum: 9b831a39755935333f86f167527a094d
+    - path: output/bclconvert/sample1_S1_L001_R2_001.fastq.gz
+      md5sum: 082f4f767b7619f409ca7e752ef482bf
+    - path: output/bclconvert/sample1_S1_L002_R1_001.fastq.gz
+      md5sum: 837764c89db93dfb53cd663c4f26f3d7
+    - path: output/bclconvert/sample1_S1_L002_R2_001.fastq.gz
+      md5sum: 1a42cf6ba0bb8fc7770f278e6d1ab676
+    - path: output/bclconvert/sample2_S2_L001_R1_001.fastq.gz
+      md5sum: 475bc426b7cc48d0551d40e31457dc78
+    - path: output/bclconvert/sample2_S2_L001_R2_001.fastq.gz
+      md5sum: f670ccd7d9352e0e67fe1c1232429d94
+    - path: output/bclconvert/sample2_S2_L002_R1_001.fastq.gz
+      md5sum: ebd5ff6fa5603e7d704b5a10598de58c
+    - path: output/bclconvert/sample2_S2_L002_R2_001.fastq.gz
+      md5sum: 2f83b460f52620d2548c7ef8845b31d7
+    - path: output/stub/SampleSheet.csv
+      md5sum: c579e7d2c9c917c4cfb875a0373c0936
+    - path: output/bclconvert/versions.yml

From 12afb6b0faf3cabf769c9a2a7dd477e3f066eac0 Mon Sep 17 00:00:00 2001
From: Lucpen <lucia.pena.perez@scilifelab.se>
Date: Fri, 22 Apr 2022 10:01:47 +0200
Subject: [PATCH 03/10] Update samtools view to add input path(index) (#1539)

* feat added index as input, to allow module to be used for subsampling

* fix test

* feat added index to meta.yml

* Update modules/samtools/view/meta.yml

feat corrected description of idea pattern file in meta.yml

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>
---
 modules/samtools/view/main.nf       | 2 +-
 modules/samtools/view/meta.yml      | 4 ++++
 tests/modules/samtools/view/main.nf | 7 ++++---
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/modules/samtools/view/main.nf b/modules/samtools/view/main.nf
index 5f14fbbf..11cfb74b 100644
--- a/modules/samtools/view/main.nf
+++ b/modules/samtools/view/main.nf
@@ -8,7 +8,7 @@ process SAMTOOLS_VIEW {
         'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }"
 
     input:
-    tuple val(meta), path(input)
+    tuple val(meta), path(input), path(index)
     path fasta
 
     output:
diff --git a/modules/samtools/view/meta.yml b/modules/samtools/view/meta.yml
index 5604bfa7..a8b43ecc 100644
--- a/modules/samtools/view/meta.yml
+++ b/modules/samtools/view/meta.yml
@@ -25,6 +25,10 @@ input:
       type: file
       description: BAM/CRAM/SAM file
       pattern: "*.{bam,cram,sam}"
+  - index:
+      type: optional file
+      description: BAM.BAI/CRAM.CRAI file
+      pattern: "*.{.bai,.crai}"
   - fasta:
       type: optional file
       description: Reference file the CRAM was created with
diff --git a/tests/modules/samtools/view/main.nf b/tests/modules/samtools/view/main.nf
index 8ee27ef8..9c239066 100644
--- a/tests/modules/samtools/view/main.nf
+++ b/tests/modules/samtools/view/main.nf
@@ -6,7 +6,8 @@ include { SAMTOOLS_VIEW } from '../../../../modules/samtools/view/main.nf'
 
 workflow test_samtools_view {
     input = [ [ id:'test', single_end:false ], // meta map
-                file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true)
+                file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true),
+                []
             ]
 
     SAMTOOLS_VIEW ( input, [] )
@@ -14,8 +15,8 @@ workflow test_samtools_view {
 
 workflow test_samtools_view_cram {
    input = [ [ id: 'test' ], // meta map
-               file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true),
-               file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true)
+                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true)
             ]
     fasta   = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
 

From c7329a3a5730872dea512921217993cfa7acd959 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Fri, 22 Apr 2022 10:10:43 +0200
Subject: [PATCH 04/10] add stubs for stranger (#1543)

---
 modules/stranger/main.nf        | 11 +++++++++++
 tests/modules/stranger/main.nf  |  5 +++++
 tests/modules/stranger/test.yml | 10 ++++++++++
 3 files changed, 26 insertions(+)

diff --git a/modules/stranger/main.nf b/modules/stranger/main.nf
index 55678bd3..ddfa0070 100644
--- a/modules/stranger/main.nf
+++ b/modules/stranger/main.nf
@@ -33,4 +33,15 @@ process STRANGER {
         stranger: \$( stranger --version )
     END_VERSIONS
     """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.vcf.gz
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        stranger: \$( stranger --version )
+    END_VERSIONS
+    """
 }
diff --git a/tests/modules/stranger/main.nf b/tests/modules/stranger/main.nf
index 5bd6766b..4a930c19 100644
--- a/tests/modules/stranger/main.nf
+++ b/tests/modules/stranger/main.nf
@@ -23,3 +23,8 @@ workflow test_stranger_without_optional_variant_catalog {
     EXPANSIONHUNTER ( input, fasta, variant_catalog )
     STRANGER ( EXPANSIONHUNTER.out.vcf, [] )
 }
+
+workflow test_stranger_without_optional_variant_catalog_stubs {
+    EXPANSIONHUNTER ( input, fasta, variant_catalog )
+    STRANGER ( EXPANSIONHUNTER.out.vcf, [] )
+}
diff --git a/tests/modules/stranger/test.yml b/tests/modules/stranger/test.yml
index c7a6972e..bf922c86 100644
--- a/tests/modules/stranger/test.yml
+++ b/tests/modules/stranger/test.yml
@@ -25,3 +25,13 @@
       md5sum: bbe15159195681d5c18596d3ad85c78f
     - path: output/stranger/versions.yml
       md5sum: 8558542a007e90ea5dcdceed3f12585d
+
+- name: stranger test_stranger_without_optional_variant_catalog_stubs
+  command: nextflow run tests/modules/stranger -entry test_stranger_without_optional_variant_catalog -c tests/config/nextflow.config -stub-run
+  tags:
+    - stranger
+  files:
+    - path: output/expansionhunter/test.vcf
+    - path: output/expansionhunter/versions.yml
+    - path: output/stranger/test.vcf.gz
+    - path: output/stranger/versions.yml

From 35231d394940dca2291ac2321c8f9b2e3b039905 Mon Sep 17 00:00:00 2001
From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com>
Date: Fri, 22 Apr 2022 10:13:57 +0200
Subject: [PATCH 05/10] update picard/collecthsmetrics (#1542)

* update picard/collecthsmetrics

* syntax fixes, bugfixes

* add tests

Co-authored-by: Jose Espinosa-Carrasco <kadomu@gmail.com>
---
 modules/picard/collecthsmetrics/main.nf        | 9 +++++----
 modules/picard/collecthsmetrics/meta.yml       | 7 ++++---
 tests/modules/picard/collecthsmetrics/main.nf  | 2 +-
 tests/modules/picard/collecthsmetrics/test.yml | 2 +-
 4 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/modules/picard/collecthsmetrics/main.nf b/modules/picard/collecthsmetrics/main.nf
index 3acf8bb8..ef7a9b9f 100644
--- a/modules/picard/collecthsmetrics/main.nf
+++ b/modules/picard/collecthsmetrics/main.nf
@@ -15,8 +15,8 @@ process PICARD_COLLECTHSMETRICS {
     path target_intervals
 
     output:
-    tuple val(meta), path("*collecthsmetrics.txt"), emit: hs_metrics
-    path "versions.yml"                           , emit: versions
+    tuple val(meta), path("*_metrics")  , emit: metrics
+    path "versions.yml"                 , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
@@ -41,7 +41,8 @@ process PICARD_COLLECTHSMETRICS {
         -BAIT_INTERVALS $bait_intervals \\
         -TARGET_INTERVALS $target_intervals \\
         -INPUT $bam \\
-        -OUTPUT ${prefix}_collecthsmetrics.txt
+        -OUTPUT ${prefix}.CollectHsMetrics.coverage_metrics
+
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
@@ -52,7 +53,7 @@ process PICARD_COLLECTHSMETRICS {
     stub:
     def prefix = task.ext.prefix ?: "${meta.id}"
     """
-    touch ${prefix}_collecthsmetrics.txt
+    touch ${prefix}.CollectHsMetrics.coverage_metrics
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/modules/picard/collecthsmetrics/meta.yml b/modules/picard/collecthsmetrics/meta.yml
index 4b94909f..dc9d647a 100644
--- a/modules/picard/collecthsmetrics/meta.yml
+++ b/modules/picard/collecthsmetrics/meta.yml
@@ -57,10 +57,11 @@ output:
       type: file
       description: File containing software versions
       pattern: "versions.yml"
-  - hs_metrics:
+  - metrics:
       type: file
-      description: The metrics file.
-      pattern: "*_collecthsmetrics.txt"
+      description: Alignment metrics files generated by picard
+      pattern: "*_{metrics}"
 
 authors:
   - "@projectoriented"
+  - "@matthdsm"
diff --git a/tests/modules/picard/collecthsmetrics/main.nf b/tests/modules/picard/collecthsmetrics/main.nf
index 2e8727b5..a28eb174 100644
--- a/tests/modules/picard/collecthsmetrics/main.nf
+++ b/tests/modules/picard/collecthsmetrics/main.nf
@@ -7,7 +7,7 @@ include { PICARD_COLLECTHSMETRICS } from '../../../../modules/picard/collecthsme
 workflow test_picard_collecthsmetrics {
 
     input = [ [ id:'test', single_end:false ], // meta map
-              file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) ]
+            file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) ]
 
     fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
     fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
diff --git a/tests/modules/picard/collecthsmetrics/test.yml b/tests/modules/picard/collecthsmetrics/test.yml
index 9232d508..9aa14f15 100644
--- a/tests/modules/picard/collecthsmetrics/test.yml
+++ b/tests/modules/picard/collecthsmetrics/test.yml
@@ -5,4 +5,4 @@
     - picard/collecthsmetrics
   files:
     # The file can't be md5'd consistently
-    - path: output/picard/test_collecthsmetrics.txt
+    - path: output/picard/test.CollectHsMetrics.coverage_metrics

From 90b203d3e915cce7434ed010b8a56a89f4142bdd Mon Sep 17 00:00:00 2001
From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com>
Date: Fri, 22 Apr 2022 10:20:05 +0200
Subject: [PATCH 06/10] Tool/elprep split (#1533)

* tool: elprep split

* fixes for testing

* fix tests

* fix test outputs

* create test-yaml

* fix suggestions by @jfy133

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>
---
 modules/elprep/split/main.nf               | 44 ++++++++++++++++++++++
 modules/elprep/split/meta.yml              | 43 +++++++++++++++++++++
 tests/config/pytest_modules.yml            |  4 ++
 tests/modules/elprep/split/main.nf         | 15 ++++++++
 tests/modules/elprep/split/nextflow.config |  9 +++++
 tests/modules/elprep/split/test.yml        | 10 +++++
 6 files changed, 125 insertions(+)
 create mode 100644 modules/elprep/split/main.nf
 create mode 100644 modules/elprep/split/meta.yml
 create mode 100644 tests/modules/elprep/split/main.nf
 create mode 100644 tests/modules/elprep/split/nextflow.config
 create mode 100644 tests/modules/elprep/split/test.yml

diff --git a/modules/elprep/split/main.nf b/modules/elprep/split/main.nf
new file mode 100644
index 00000000..8af558d4
--- /dev/null
+++ b/modules/elprep/split/main.nf
@@ -0,0 +1,44 @@
+process ELPREP_SPLIT {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda (params.enable_conda ? "bioconda::elprep=5.1.2" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/elprep:5.1.2--he881be0_0':
+        'quay.io/biocontainers/elprep:5.1.2--he881be0_0' }"
+
+    input:
+    tuple val(meta), path(bam)
+
+    output:
+    tuple val(meta), path("**.{bam,sam}"), emit: bam
+    path "versions.yml"           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    meta.single_end ? args += " --single-end": ""
+
+    """
+    # create directory and move all input so elprep can find and merge them before splitting
+    mkdir input
+    mv ${bam} input/
+
+    mkdir ${prefix}
+
+    elprep split \\
+        input \\
+        . \\
+        $args \\
+        --nr-of-threads $task.cpus \\
+        --output-prefix $prefix
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        elprep: \$(elprep 2>&1 | head -n2 | tail -n1 |sed 's/^.*version //;s/ compiled.*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/elprep/split/meta.yml b/modules/elprep/split/meta.yml
new file mode 100644
index 00000000..b99562fa
--- /dev/null
+++ b/modules/elprep/split/meta.yml
@@ -0,0 +1,43 @@
+name: "elprep_split"
+description: Split bam file into manageable chunks
+keywords:
+  - bam
+  - split by chromosome
+tools:
+  - "elprep":
+      description: "elPrep is a high-performance tool for preparing .sam/.bam files for variant calling in sequencing pipelines. It can be used as a drop-in replacement for SAMtools/Picard/GATK4."
+      homepage: "https://github.com/ExaScience/elprep"
+      documentation: "https://github.com/ExaScience/elprep"
+      tool_dev_url: "https://github.com/ExaScience/elprep"
+      doi: "10.1371"
+      licence: "['AGPL v3']"
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bam:
+      type: file
+      description: List of BAM/SAM files
+      pattern: "*.{bam,sam}"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  #
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - bam:
+      type: file
+      description: List of split BAM/SAM files
+      pattern: "*.{bam,sam}"
+
+authors:
+  - "@matthdsm"
diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml
index 786f87db..19b51f3d 100644
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@@ -599,6 +599,10 @@ ectyper:
   - modules/ectyper/**
   - tests/modules/ectyper/**
 
+elprep/split:
+  - modules/elprep/split/**
+  - tests/modules/elprep/split/**
+
 emmtyper:
   - modules/emmtyper/**
   - tests/modules/emmtyper/**
diff --git a/tests/modules/elprep/split/main.nf b/tests/modules/elprep/split/main.nf
new file mode 100644
index 00000000..d5a111de
--- /dev/null
+++ b/tests/modules/elprep/split/main.nf
@@ -0,0 +1,15 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { ELPREP_SPLIT } from '../../../../modules/elprep/split/main.nf'
+
+workflow test_elprep_split {
+
+    input = [
+        [ id:'test', single_end:false ], // meta map
+        file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
+    ]
+
+    ELPREP_SPLIT ( input )
+}
diff --git a/tests/modules/elprep/split/nextflow.config b/tests/modules/elprep/split/nextflow.config
new file mode 100644
index 00000000..a3ae0169
--- /dev/null
+++ b/tests/modules/elprep/split/nextflow.config
@@ -0,0 +1,9 @@
+process {
+
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+
+    withName : ELPREP_SPLIT {
+        ext.args = "--contig-group-size 1 --output-type bam"
+    }
+
+}
diff --git a/tests/modules/elprep/split/test.yml b/tests/modules/elprep/split/test.yml
new file mode 100644
index 00000000..7ba139b1
--- /dev/null
+++ b/tests/modules/elprep/split/test.yml
@@ -0,0 +1,10 @@
+- name: elprep split test_elprep_split
+  command: nextflow run tests/modules/elprep/split -entry test_elprep_split -c tests/config/nextflow.config
+  tags:
+    - elprep
+    - elprep/split
+  files:
+    - path: output/elprep/splits/test-group00001.bam
+    - path: output/elprep/splits/test-unmapped.bam
+    - path: output/elprep/test-spread.bam
+    - path: output/elprep/versions.yml

From 9e3daae8ef8cc1e830c9ef8af5336df7065d2823 Mon Sep 17 00:00:00 2001
From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com>
Date: Fri, 22 Apr 2022 11:08:03 +0200
Subject: [PATCH 07/10] New module: elprep filter (#1524)

* first commit

* syntax fix

* fix input

* output sam during test for md5sum

* replace md5sum with contains

* add new test data, add extra in/outputs

* cli fixes

* fix outputs

* Update modules/elprep/filter/main.nf

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* Update modules/elprep/filter/meta.yml

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* Update modules/elprep/filter/meta.yml

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* fix suggestions by @jfy133

* Bit more verbose explanation for bool vals

* define variables

* fix prettier

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>
---
 modules/elprep/filter/main.nf               |  89 ++++++++++++++++
 modules/elprep/filter/meta.yml              | 106 ++++++++++++++++++++
 tests/config/pytest_modules.yml             |   4 +
 tests/config/test_data.config               |   3 +
 tests/modules/elprep/filter/main.nf         |  18 ++++
 tests/modules/elprep/filter/nextflow.config |   7 ++
 tests/modules/elprep/filter/test.yml        |  13 +++
 7 files changed, 240 insertions(+)
 create mode 100644 modules/elprep/filter/main.nf
 create mode 100644 modules/elprep/filter/meta.yml
 create mode 100644 tests/modules/elprep/filter/main.nf
 create mode 100644 tests/modules/elprep/filter/nextflow.config
 create mode 100644 tests/modules/elprep/filter/test.yml

diff --git a/modules/elprep/filter/main.nf b/modules/elprep/filter/main.nf
new file mode 100644
index 00000000..02c93186
--- /dev/null
+++ b/modules/elprep/filter/main.nf
@@ -0,0 +1,89 @@
+process ELPREP_FILTER {
+    tag "$meta.id"
+    label 'process_high'
+
+    conda (params.enable_conda ? "bioconda::elprep=5.1.2" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/elprep:5.1.2--he881be0_0':
+        'quay.io/biocontainers/elprep:5.1.2--he881be0_0' }"
+
+    input:
+    tuple val(meta), path(bam)
+    val(run_haplotypecaller)
+    val(run_bqsr)
+    path(reference_sequences)
+    path(filter_regions_bed)
+    path(reference_elfasta)
+    path(known_sites_elsites)
+    path(target_regions_bed)
+    path(intermediate_bqsr_tables)
+    val(bqsr_tables_only)
+    val(get_activity_profile)
+    val(get_assembly_regions)
+
+
+    output:
+    tuple val(meta), path("**.{bam,sam}")           ,emit: bam
+    tuple val(meta), path("*.metrics.txt")          ,optional: true, emit: metrics
+    tuple val(meta), path("*.recall")               ,optional: true, emit: recall
+    tuple val(meta), path("*.vcf.gz")               ,optional: true, emit: gvcf
+    tuple val(meta), path("*.table")                ,optional: true, emit: table
+    tuple val(meta), path("*.activity_profile.igv") ,optional: true, emit: activity_profile
+    tuple val(meta), path("*.assembly_regions.igv") ,optional: true, emit: assembly_regions
+    path "versions.yml"                             ,emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def suffix = args.contains("--output-type sam") ? "sam" : "bam"
+
+    // filter args
+    def reference_sequences_cmd = reference_sequences ? " --replace-reference-sequences ${reference_sequences}" : ""
+    def filter_regions_cmd      = filter_regions_bed  ? " --filter-non-overlapping-reads ${filter_regions_bed}" : ""
+
+    // markdup args
+    def markdup_cmd = args.contains("--mark-duplicates") ? " --mark-optical-duplicates ${prefix}.metrics.txt": ""
+
+    // variant calling args
+    def haplotyper_cmd = run_haplotypecaller ? " --haplotypecaller ${prefix}.g.vcf.gz": ""
+
+    def fasta_cmd           = reference_elfasta   ? " --reference ${reference_elfasta}": ""
+    def known_sites_cmd     = known_sites_elsites ? " --known-sites ${known_sites_elsites}": ""
+    def target_regions_cmd  = target_regions_bed  ? " --target-regions ${target_regions_bed}": ""
+
+    // bqsr args
+    def bqsr_cmd = run_bqsr ? " --bqsr ${prefix}.recall": ""
+    def bqsr_tables_only_cmd = bqsr_tables_only ? " --bqsr-tables-only ${prefix}.table": ""
+
+    def intermediate_bqsr_cmd = intermediate_bqsr_tables ? " --bqsr-apply .": ""
+
+    // misc
+    def activity_profile_cmd = get_activity_profile ? " --activity-profile ${prefix}.activity_profile.igv": ""
+    def assembly_regions_cmd = get_assembly_regions ? " --assembly-regions ${prefix}.assembly_regions.igv": ""
+
+    """
+    elprep filter ${bam} ${prefix}.${suffix} \\
+        ${reference_sequences_cmd} \\
+        ${filter_regions_cmd} \\
+        ${markdup_cmd} \\
+        ${haplotyper_cmd} \\
+        ${fasta_cmd} \\
+        ${known_sites_cmd} \\
+        ${target_regions_cmd} \\
+        ${bqsr_cmd} \\
+        ${bqsr_tables_only_cmd} \\
+        ${intermediate_bqsr_cmd} \\
+        ${activity_profile_cmd} \\
+        ${assembly_regions_cmd} \\
+        --nr-of-threads ${task.cpus} \\
+        $args
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        elprep: \$(elprep 2>&1 | head -n2 | tail -n1 |sed 's/^.*version //;s/ compiled.*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/elprep/filter/meta.yml b/modules/elprep/filter/meta.yml
new file mode 100644
index 00000000..d7d41071
--- /dev/null
+++ b/modules/elprep/filter/meta.yml
@@ -0,0 +1,106 @@
+name: "elprep_filter"
+description: "Filter, sort and markdup sam/bam files, with optional BQSR and variant calling."
+keywords:
+  - sort
+  - bam
+  - sam
+  - filter
+  - variant calling
+tools:
+  - "elprep":
+      description: "elPrep is a high-performance tool for preparing .sam/.bam files for variant calling in sequencing pipelines. It can be used as a drop-in replacement for SAMtools/Picard/GATK4."
+      homepage: "https://github.com/ExaScience/elprep"
+      documentation: "https://github.com/ExaScience/elprep"
+      tool_dev_url: "https://github.com/ExaScience/elprep"
+      doi: "10.1371/journal.pone.0244471"
+      licence: "['AGPL v3']"
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bam:
+      type: file
+      description: Input SAM/BAM file
+      pattern: "*.{bam,sam}"
+  - run_haplotypecaller:
+      type: boolean
+      description: Run variant calling on the input files. Needed to generate gvcf output.
+  - run_bqsr:
+      type: boolean
+      description: Run BQSR on the input files. Needed to generate recall metrics.
+  - reference_sequences:
+      type: file
+      description: Optional SAM header to replace existing header.
+      pattern: "*.sam"
+  - filter_regions_bed:
+      type: file
+      description: Optional BED file containing regions to filter.
+      pattern: "*.bed"
+  - reference_elfasta:
+      type: file
+      description: Elfasta file, required for BQSR and variant calling.
+      pattern: "*.elfasta"
+  - known_sites:
+      type: file
+      description: Optional elsites file containing known SNPs for BQSR.
+      pattern: "*.elsites"
+  - target_regions_bed:
+      type: file
+      description: Optional BED file containing target regions for BQSR and variant calling.
+      pattern: "*.bed"
+  - intermediate_bqsr_tables:
+      type: file
+      description: Optional list of BQSR tables, used when parsing files created by `elprep split`
+      pattern: "*.table"
+  - bqsr_tables_only:
+      type: boolean
+      description: Write intermediate BQSR tables, used when parsing files created by `elprep split`.
+  - get_activity_profile:
+      type: boolean
+      description: Get the activity profile calculated by the haplotypecaller to the given file in IGV format.
+  - get_assembly_regions:
+      type: boolean
+      description: Get the assembly regions calculated by haplotypecaller to the speficied file in IGV format.
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - bam:
+      type: file
+      description: Sorted, markdup, optionally BQSR BAM/SAM file
+      pattern: "*.{bam,sam}"
+  - metrics:
+      type: file
+      description: Optional duplicate metrics file generated by elprep
+      pattern: "*.{metrics.txt}"
+  - recall:
+      type: file
+      description: Optional recall metrics file generated by elprep
+      pattern: "*.{recall}"
+  - gvcf:
+      type: file
+      description: Optional GVCF output file
+      pattern: "*.{vcf.gz}"
+  - table:
+      type: file
+      description: Optional intermediate BQSR table output file
+      pattern: "*.{table}"
+  - activity_profile:
+      type: file
+      description: Optional activity profile output file
+      pattern: "*.{activity_profile.igv}"
+  - assembly_regions:
+      type: file
+      description: Optional activity regions output file
+      pattern: "*.{assembly_regions.igv}"
+authors:
+  - "@matthdsm"
diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml
index 19b51f3d..c3bf04aa 100644
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@@ -599,6 +599,10 @@ ectyper:
   - modules/ectyper/**
   - tests/modules/ectyper/**
 
+elprep/filter:
+  - modules/elprep/filter/**
+  - tests/modules/elprep/filter/**
+
 elprep/split:
   - modules/elprep/split/**
   - tests/modules/elprep/split/**
diff --git a/tests/config/test_data.config b/tests/config/test_data.config
index ea123732..559c0d6f 100644
--- a/tests/config/test_data.config
+++ b/tests/config/test_data.config
@@ -112,6 +112,7 @@ params {
         }
         'homo_sapiens' {
             'genome' {
+                genome_elfasta                                 = "${test_data_dir}/genomics/homo_sapiens/genome/genome.elfasta"
                 genome_fasta                                   = "${test_data_dir}/genomics/homo_sapiens/genome/genome.fasta"
                 genome_fasta_fai                               = "${test_data_dir}/genomics/homo_sapiens/genome/genome.fasta.fai"
                 genome_dict                                    = "${test_data_dir}/genomics/homo_sapiens/genome/genome.dict"
@@ -123,6 +124,7 @@ params {
                 genome_header                                  = "${test_data_dir}/genomics/homo_sapiens/genome/genome.header"
                 genome_bed_gz                                  = "${test_data_dir}/genomics/homo_sapiens/genome/genome.bed.gz"
                 genome_bed_gz_tbi                              = "${test_data_dir}/genomics/homo_sapiens/genome/genome.bed.gz.tbi"
+                genome_elsites                                 = "${test_data_dir}/genomics/homo_sapiens/genome/genome.elsites"
                 transcriptome_fasta                            = "${test_data_dir}/genomics/homo_sapiens/genome/transcriptome.fasta"
                 genome2_fasta                                  = "${test_data_dir}/genomics/homo_sapiens/genome/genome2.fasta"
                 genome_chain_gz                                = "${test_data_dir}/genomics/homo_sapiens/genome/genome.chain.gz"
@@ -136,6 +138,7 @@ params {
                 genome_21_multi_interval_bed_gz_tbi            = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed.gz.tbi"
                 genome_21_chromosomes_dir                      = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/chromosomes.tar.gz"
 
+                dbsnp_146_hg38_elsites                         = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.elsites"
                 dbsnp_146_hg38_vcf_gz                          = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz"
                 dbsnp_146_hg38_vcf_gz_tbi                      = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi"
                 gnomad_r2_1_1_vcf_gz                           = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz"
diff --git a/tests/modules/elprep/filter/main.nf b/tests/modules/elprep/filter/main.nf
new file mode 100644
index 00000000..0a8d43ca
--- /dev/null
+++ b/tests/modules/elprep/filter/main.nf
@@ -0,0 +1,18 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { ELPREP_FILTER } from '../../../../modules/elprep/filter/main.nf'
+
+workflow test_elprep_filter {
+
+    input = [
+        [ id:'test', single_end:false ], // meta map
+        file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
+    ]
+    reference_elfasta = file(params.test_data['homo_sapiens']['genome']['genome_elfasta'], checkIfExists: true)
+    known_sites_elsites = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_elsites'], checkIfExists: true)
+    target_regions_bed = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)
+
+    ELPREP_FILTER ( input, true, true, [],  [], reference_elfasta, known_sites_elsites, target_regions_bed, [], [], true, true)
+}
diff --git a/tests/modules/elprep/filter/nextflow.config b/tests/modules/elprep/filter/nextflow.config
new file mode 100644
index 00000000..d53a3d2d
--- /dev/null
+++ b/tests/modules/elprep/filter/nextflow.config
@@ -0,0 +1,7 @@
+process {
+
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+    withName: ELPREP_FILTER {
+        ext.args = "--mark-duplicates "
+    }
+}
diff --git a/tests/modules/elprep/filter/test.yml b/tests/modules/elprep/filter/test.yml
new file mode 100644
index 00000000..5242045b
--- /dev/null
+++ b/tests/modules/elprep/filter/test.yml
@@ -0,0 +1,13 @@
+- name: elprep filter test_elprep_filter
+  command: nextflow run tests/modules/elprep/filter -entry test_elprep_filter -c tests/config/nextflow.config
+  tags:
+    - elprep
+    - elprep/filter
+  files:
+    - path: output/elprep/test.activity_profile.igv
+    - path: output/elprep/test.assembly_regions.igv
+    - path: output/elprep/test.bam
+    - path: output/elprep/test.g.vcf.gz
+    - path: output/elprep/test.metrics.txt
+    - path: output/elprep/test.recall
+    - path: output/elprep/versions.yml

From 538dbac98ba9c8f799536cd5a617195501439457 Mon Sep 17 00:00:00 2001
From: Sofia Stamouli <91951607+sofstam@users.noreply.github.com>
Date: Fri, 22 Apr 2022 11:26:30 +0200
Subject: [PATCH 08/10] Kaiju2table module (#1545)

* Add kaiju2table module
---
 modules/kaiju/kaiju2table/main.nf             | 40 +++++++++++++++
 modules/kaiju/kaiju2table/meta.yml            | 50 +++++++++++++++++++
 tests/config/pytest_modules.yml               |  4 ++
 tests/modules/kaiju/kaiju2table/main.nf       | 21 ++++++++
 .../modules/kaiju/kaiju2table/nextflow.config |  5 ++
 tests/modules/kaiju/kaiju2table/test.yml      |  9 ++++
 6 files changed, 129 insertions(+)
 create mode 100644 modules/kaiju/kaiju2table/main.nf
 create mode 100644 modules/kaiju/kaiju2table/meta.yml
 create mode 100644 tests/modules/kaiju/kaiju2table/main.nf
 create mode 100644 tests/modules/kaiju/kaiju2table/nextflow.config
 create mode 100644 tests/modules/kaiju/kaiju2table/test.yml

diff --git a/modules/kaiju/kaiju2table/main.nf b/modules/kaiju/kaiju2table/main.nf
new file mode 100644
index 00000000..00739d1e
--- /dev/null
+++ b/modules/kaiju/kaiju2table/main.nf
@@ -0,0 +1,40 @@
+process KAIJU_KAIJU2TABLE {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda (params.enable_conda ? "bioconda::kaiju=1.8.2" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/kaiju:1.8.2--h5b5514e_1':
+        'quay.io/biocontainers/kaiju:1.8.2--h2e03b76_0' }"
+
+    input:
+    tuple val(meta), path(results)
+    path db
+    val taxon_rank
+
+    output:
+    tuple val(meta), path('*.txt'), emit: summary
+    path "versions.yml"           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    dbnodes=`find -L ${db} -name "*nodes.dmp"`
+    dbname=`find -L ${db} -name "*.fmi" -not -name "._*"`
+    kaiju2table   $args \\
+        -t \$dbnodes \\
+        -n \$dbname \\
+        -r ${taxon_rank} \\
+        -o ${prefix}.txt \\
+        ${results}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        kaiju: \$(echo \$( kaiju -h 2>&1 | sed -n 1p | sed 's/^.*Kaiju //' ))
+    END_VERSIONS
+    """
+}
diff --git a/modules/kaiju/kaiju2table/meta.yml b/modules/kaiju/kaiju2table/meta.yml
new file mode 100644
index 00000000..bc3e85d7
--- /dev/null
+++ b/modules/kaiju/kaiju2table/meta.yml
@@ -0,0 +1,50 @@
+name: "kaiju_kaiju2table"
+description: write your description here
+keywords:
+  - classify
+  - metagenomics
+tools:
+  - kaiju:
+      description: Fast and sensitive taxonomic classification for metagenomics
+      homepage: https://kaiju.binf.ku.dk/
+      documentation: https://github.com/bioinformatics-centre/kaiju/blob/master/README.md
+      tool_dev_url: https://github.com/bioinformatics-centre/kaiju
+      doi: "10.1038/ncomms11257"
+      licence: ["GNU GPL v3"]
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - results:
+      type: file
+      description: File containing the kaiju classification results
+      pattern: "*.{txt}"
+  - taxon_rank:
+      type: string
+      description: |
+        Taxonomic rank to display in report
+        pattern: "phylum|class|order|family|genus|species"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - results:
+      type: file
+      description: |
+        Summary table for a given taxonomic rank
+      pattern: "*.{tsv}"
+
+authors:
+  - "@sofstam"
+  - "@talnor"
+  - "@jfy133"
diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml
index c3bf04aa..a1a969e7 100644
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@@ -1009,6 +1009,10 @@ kaiju/kaiju:
   - modules/kaiju/kaiju/**
   - tests/modules/kaiju/kaiju/**
 
+kaiju/kaiju2table:
+  - modules/kaiju/kaiju2table/**
+  - tests/modules/kaiju/kaiju2table/**
+
 kallisto/index:
   - modules/kallisto/index/**
   - tests/modules/kallisto/index/**
diff --git a/tests/modules/kaiju/kaiju2table/main.nf b/tests/modules/kaiju/kaiju2table/main.nf
new file mode 100644
index 00000000..b7169ba5
--- /dev/null
+++ b/tests/modules/kaiju/kaiju2table/main.nf
@@ -0,0 +1,21 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { UNTAR             } from '../../../../modules/untar/main.nf'
+include { KAIJU_KAIJU       } from '../../../../modules/kaiju/kaiju/main.nf'
+include { KAIJU_KAIJU2TABLE } from '../../../../modules/kaiju/kaiju2table/main.nf'
+
+workflow test_kaiju_kaiju_single_end {
+
+    input = [
+        [ id:'test', single_end:true ], // meta map
+        file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+    ]
+    db    = [ [], file(params.test_data['sarscov2']['genome']['kaiju_tar_gz'], checkIfExists: true) ]
+    taxon_rank = "species"
+
+    ch_db = UNTAR ( db )
+    KAIJU_KAIJU ( input, ch_db.untar.map{ it[1] } )
+    KAIJU_KAIJU2TABLE ( KAIJU_KAIJU.out.results, ch_db.untar.map{ it[1] }, taxon_rank )
+}
diff --git a/tests/modules/kaiju/kaiju2table/nextflow.config b/tests/modules/kaiju/kaiju2table/nextflow.config
new file mode 100644
index 00000000..50f50a7a
--- /dev/null
+++ b/tests/modules/kaiju/kaiju2table/nextflow.config
@@ -0,0 +1,5 @@
+process {
+
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+    
+}
\ No newline at end of file
diff --git a/tests/modules/kaiju/kaiju2table/test.yml b/tests/modules/kaiju/kaiju2table/test.yml
new file mode 100644
index 00000000..47d99c89
--- /dev/null
+++ b/tests/modules/kaiju/kaiju2table/test.yml
@@ -0,0 +1,9 @@
+- name: kaiju kaiju2table test_kaiju_kaiju_single_end
+  command: nextflow run tests/modules/kaiju/kaiju2table -entry test_kaiju_kaiju_single_end -c tests/config/nextflow.config
+  tags:
+    - kaiju
+    - kaiju/kaiju2table
+  files:
+    - path: output/kaiju/test.txt
+      md5sum: 0d9f8fd36fcf2888296ae12632c5f0a8
+    - path: output/kaiju/versions.yml

From b1749445d76d12d9961e687e811af1337f0eff0f Mon Sep 17 00:00:00 2001
From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com>
Date: Fri, 22 Apr 2022 12:50:17 +0200
Subject: [PATCH 09/10] fix output glob (#1551)

---
 modules/elprep/split/main.nf        | 11 ++++++-----
 tests/modules/elprep/split/test.yml |  6 +++---
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/modules/elprep/split/main.nf b/modules/elprep/split/main.nf
index 8af558d4..639944ef 100644
--- a/modules/elprep/split/main.nf
+++ b/modules/elprep/split/main.nf
@@ -11,16 +11,16 @@ process ELPREP_SPLIT {
     tuple val(meta), path(bam)
 
     output:
-    tuple val(meta), path("**.{bam,sam}"), emit: bam
+    tuple val(meta), path("output/**.{bam,sam}"), emit: bam
     path "versions.yml"           , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
 
     script:
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    meta.single_end ? args += " --single-end": ""
+    def args        = task.ext.args ?: ''
+    def prefix      = task.ext.prefix ?: "${meta.id}"
+    def single_end  = meta.single_end ? " --single-end": ""
 
     """
     # create directory and move all input so elprep can find and merge them before splitting
@@ -31,8 +31,9 @@ process ELPREP_SPLIT {
 
     elprep split \\
         input \\
-        . \\
+        output/ \\
         $args \\
+        $single_end \\
         --nr-of-threads $task.cpus \\
         --output-prefix $prefix
 
diff --git a/tests/modules/elprep/split/test.yml b/tests/modules/elprep/split/test.yml
index 7ba139b1..2de3f99b 100644
--- a/tests/modules/elprep/split/test.yml
+++ b/tests/modules/elprep/split/test.yml
@@ -4,7 +4,7 @@
     - elprep
     - elprep/split
   files:
-    - path: output/elprep/splits/test-group00001.bam
-    - path: output/elprep/splits/test-unmapped.bam
-    - path: output/elprep/test-spread.bam
+    - path: output/elprep/output/splits/test-group00001.bam
+    - path: output/elprep/output/splits/test-unmapped.bam
+    - path: output/elprep/output/test-spread.bam
     - path: output/elprep/versions.yml

From c17d1a7a7b473e103ddd3f28bd91b79733aa7cf2 Mon Sep 17 00:00:00 2001
From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com>
Date: Fri, 22 Apr 2022 13:04:41 +0200
Subject: [PATCH 10/10] fix output glob (#1552)

---
 modules/elprep/filter/main.nf        | 4 ++--
 tests/modules/elprep/filter/test.yml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/elprep/filter/main.nf b/modules/elprep/filter/main.nf
index 02c93186..5ca288bc 100644
--- a/modules/elprep/filter/main.nf
+++ b/modules/elprep/filter/main.nf
@@ -23,7 +23,7 @@ process ELPREP_FILTER {
 
 
     output:
-    tuple val(meta), path("**.{bam,sam}")           ,emit: bam
+    tuple val(meta), path("output/**.{bam,sam}")    ,emit: bam
     tuple val(meta), path("*.metrics.txt")          ,optional: true, emit: metrics
     tuple val(meta), path("*.recall")               ,optional: true, emit: recall
     tuple val(meta), path("*.vcf.gz")               ,optional: true, emit: gvcf
@@ -65,7 +65,7 @@ process ELPREP_FILTER {
     def assembly_regions_cmd = get_assembly_regions ? " --assembly-regions ${prefix}.assembly_regions.igv": ""
 
     """
-    elprep filter ${bam} ${prefix}.${suffix} \\
+    elprep filter ${bam} output/${prefix}.${suffix} \\
         ${reference_sequences_cmd} \\
         ${filter_regions_cmd} \\
         ${markdup_cmd} \\
diff --git a/tests/modules/elprep/filter/test.yml b/tests/modules/elprep/filter/test.yml
index 5242045b..922d7a9b 100644
--- a/tests/modules/elprep/filter/test.yml
+++ b/tests/modules/elprep/filter/test.yml
@@ -6,7 +6,7 @@
   files:
     - path: output/elprep/test.activity_profile.igv
     - path: output/elprep/test.assembly_regions.igv
-    - path: output/elprep/test.bam
+    - path: output/elprep/output/test.bam
     - path: output/elprep/test.g.vcf.gz
     - path: output/elprep/test.metrics.txt
     - path: output/elprep/test.recall