From 4b68e957702e2b3e68ed1cdfe91049b580498ad7 Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Mon, 12 Dec 2022 08:49:13 +0100
Subject: [PATCH 01/11] Revert PEP validation back to python samplesheet check

---
 README.md                              |   2 -
 assets/samplesheet_schema.yaml         |  55 ------
 bin/check_samplesheet.py               | 236 +++++++++++++++++++++++++
 conf/modules.config                    |   8 -
 conf/test.config                       |   6 -
 conf/test_krakenuniq.config            |   6 -
 conf/test_pep.config                   |  45 -----
 docs/usage.md                          |   7 -
 lib/WorkflowMain.groovy                |   2 +-
 modules.json                           |  10 --
 modules/local/samplesheet_check.nf     |  27 +++
 modules/nf-core/eido/convert/main.nf   |  38 ----
 modules/nf-core/eido/convert/meta.yml  |  39 ----
 modules/nf-core/eido/validate/main.nf  |  33 ----
 modules/nf-core/eido/validate/meta.yml |  41 -----
 nextflow.config                        |   1 -
 subworkflows/local/input_check.nf      |  42 +----
 workflows/taxprofiler.nf               |   5 +-
 18 files changed, 275 insertions(+), 328 deletions(-)
 delete mode 100644 assets/samplesheet_schema.yaml
 create mode 100755 bin/check_samplesheet.py
 delete mode 100644 conf/test_pep.config
 create mode 100644 modules/local/samplesheet_check.nf
 delete mode 100644 modules/nf-core/eido/convert/main.nf
 delete mode 100644 modules/nf-core/eido/convert/meta.yml
 delete mode 100644 modules/nf-core/eido/validate/main.nf
 delete mode 100644 modules/nf-core/eido/validate/meta.yml
diff --git a/README.md b/README.md
index 3f45cba..ba4022f 100644
--- a/README.md
+++ b/README.md
@@ -77,8 +77,6 @@ On release, automated continuous integration tests run the pipeline on a full-si
    nextflow run nf-core/taxprofiler --input samplesheet.csv --databases database.csv --outdir <OUTDIR> --run_<TOOL1> --run_<TOOL1> -profile <docker/singularity/podman/shifter/charliecloud/conda/institute>
    ```
 
-Note pipeline supports both CSV and PEP input sample sheets. Find out more [here](http://pep.databio.org/en/2.1.0/specification/).
-
 ## Documentation
 
 The nf-core/taxprofiler pipeline comes with documentation about the pipeline [usage](https://nf-co.re/taxprofiler/usage), [parameters](https://nf-co.re/taxprofiler/parameters) and [output](https://nf-co.re/taxprofiler/output).
diff --git a/assets/samplesheet_schema.yaml b/assets/samplesheet_schema.yaml
deleted file mode 100644
index 88ff451..0000000
--- a/assets/samplesheet_schema.yaml
+++ /dev/null
@@ -1,55 +0,0 @@
-description: A schema for validation of samplesheet.csv for taxprofiler pipeline.
-imports:
-  - https://schema.databio.org/pep/2.1.0.yaml
-properties:
-  samples:
-    type: array
-    items:
-      type: object
-      properties:
-        sample:
-          type: string
-          description: "Sample identifier."
-          pattern: "^\\S*$"
-        run_accession:
-          type: string
-          description: "Run accession number."
-        instrument_platform:
-          type: string
-          description: "Name of the platform that sequenced the samples."
-          enum:
-            [
-              "ABI_SOLID",
-              "BGISEQ",
-              "CAPILLARY",
-              "COMPLETE_GENOMICS",
-              "DNBSEQ",
-              "HELICOS",
-              "ILLUMINA",
-              "ION_TORRENT",
-              "LS454",
-              "OXFORD_NANOPORE",
-              "PACBIO_SMRT",
-            ]
-        fastq1:
-          type: ["string", "null"]
-          description: "Optional FASTQ file for read 1 of paired-end sequenced libraries."
-          pattern: "^[\\S]+.(fq\\.gz|fastq\\.gz)$"
-        fastq2:
-          type: ["string", "null"]
-          description: "Optional FASTQ file for read 2 of paired-end sequenced libraries."
-          pattern: "^[\\S]+.(fq\\.gz|fastq\\.gz)$"
-        fasta:
-          type: ["string", "null"]
-          description: "Optional FASTA file."
-          pattern: "^[\\S]+.(fa\\.gz|fasta\\.gz)$"
-      required:
-        - sample
-        - run_accession
-        - instrument_platform
-      files:
-        - fastq1
-        - fastq2
-        - fasta
-required:
-  - samples
diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py
new file mode 100755
index 0000000..ca54ed9
--- /dev/null
+++ b/bin/check_samplesheet.py
@@ -0,0 +1,236 @@
+#!/usr/bin/env python
+
+from distutils import extension
+import os
+import sys
+import errno
+import argparse
+
+
+def parse_args(args=None):
+    Description = "Reformat nf-core/taxprofiler samplesheet file and check its contents."
+
+    Epilog = "Example usage: python check_samplesheet.py <FILE_IN> <FILE_OUT>"
+
+    parser = argparse.ArgumentParser(description=Description, epilog=Epilog)
+    parser.add_argument("FILE_IN", help="Input samplesheet file.")
+    parser.add_argument("FILE_OUT", help="Output file.")
+    return parser.parse_args(args)
+
+
+def make_dir(path):
+    if len(path) > 0:
+        try:
+            os.makedirs(path)
+        except OSError as exception:
+            if exception.errno != errno.EEXIST:
+                raise exception
+
+
+def print_error(error, context="Line", context_str=""):
+    error_str = "ERROR: Please check samplesheet -> {}".format(error)
+    if context != "" and context_str != "":
+        error_str = "ERROR: Please check samplesheet -> {}\n{}: '{}'".format(
+            error, context.strip(), context_str.strip()
+        )
+    print(error_str)
+    sys.exit(1)
+
+
+def check_samplesheet(file_in, file_out):
+    """
+    This function checks that the samplesheet follows the following structure:
+
+    sample,run_accession,instrument_platform,fastq_1,fastq_2,fasta
+    2611,ERR5766174,ILLUMINA,,,ERX5474930_ERR5766174_1.fa.gz
+    2612,ERR5766176,ILLUMINA,ERX5474932_ERR5766176_1.fastq.gz,ERX5474932_ERR5766176_2.fastq.gz,
+    2612,ERR5766174,ILLUMINA,ERX5474936_ERR5766180_1.fastq.gz,,
+    2613,ERR5766181,ILLUMINA,ERX5474937_ERR5766181_1.fastq.gz,ERX5474937_ERR5766181_2.fastq.gz,
+    """
+
+    FQ_EXTENSIONS = (".fq.gz", ".fastq.gz")
+    FA_EXTENSIONS = (
+        ".fa",
+        ".fa.gz",
+        ".fasta",
+        ".fasta.gz",
+        ".fna",
+        ".fna.gz",
+        ".fas",
+        ".fas.gz",
+    )
+    INSTRUMENT_PLATFORMS = [
+        "ABI_SOLID",
+        "BGISEQ",
+        "CAPILLARY",
+        "COMPLETE_GENOMICS",
+        "DNBSEQ",
+        "HELICOS",
+        "ILLUMINA",
+        "ION_TORRENT",
+        "LS454",
+        "OXFORD_NANOPORE",
+        "PACBIO_SMRT",
+    ]
+
+    sample_mapping_dict = {}
+    with open(file_in, "r") as fin:
+
+        ## Check header
+        MIN_COLS = 4
+        HEADER = [
+            "sample",
+            "run_accession",
+            "instrument_platform",
+            "fastq_1",
+            "fastq_2",
+            "fasta",
+        ]
+        header = [x.strip('"') for x in fin.readline().strip().split(",")]
+
+        ## Check for missing mandatory columns
+        missing_columns = list(set(HEADER) - set(header))
+        if len(missing_columns) > 0:
+            print(
+                "ERROR: Missing required column header -> {}. Note some columns can otherwise be empty. See pipeline documentation (https://nf-co.re/taxprofiler/usage).".format(
+                    ",".join(missing_columns)
+                )
+            )
+            sys.exit(1)
+
+        ## Find locations of mandatory columns
+        header_locs = {}
+        for i in HEADER:
+            header_locs[i] = header.index(i)
+
+        ## Check sample entries
+        for line in fin:
+
+            ## Pull out only relevant columns for downstream checking
+            line_parsed = [x.strip().strip('"') for x in line.strip().split(",")]
+            lspl = [line_parsed[i] for i in header_locs.values()]
+
+            # Check valid number of columns per row
+            if len(lspl) < len(HEADER):
+                print_error(
+                    "Invalid number of columns (minimum = {})!".format(len(HEADER)),
+                    "Line",
+                    line,
+                )
+            num_cols = len([x for x in lspl if x])
+            if num_cols < MIN_COLS:
+                print_error(
+                    "Invalid number of populated columns (minimum = {})!".format(MIN_COLS),
+                    "Line",
+                    line,
+                )
+
+            ## Check sample name entries
+
+            (
+                sample,
+                run_accession,
+                instrument_platform,
+                fastq_1,
+                fastq_2,
+                fasta,
+            ) = lspl[: len(HEADER)]
+            sample = sample.replace(" ", "_")
+            if not sample:
+                print_error("Sample entry has not been specified!", "Line", line)
+
+            ## Check FastQ file extension
+            for fastq in [fastq_1, fastq_2]:
+                if fastq:
+                    if fastq.find(" ") != -1:
+                        print_error("FastQ file contains spaces!", "Line", line)
+                    if not fastq.endswith(FQ_EXTENSIONS):
+                        print_error(
+                            f"FastQ file does not have extension {' or '.join(list(FQ_EXTENSIONS))} !",
+                            "Line",
+                            line,
+                        )
+            if fasta:
+                if fasta.find(" ") != -1:
+                    print_error("FastA file contains spaces!", "Line", line)
+                if not fasta.endswith(FA_EXTENSIONS):
+                    print_error(
+                        f"FastA file does not have extension {' or '.join(list(FA_EXTENSIONS))}!",
+                        "Line",
+                        line,
+                    )
+            sample_info = []
+
+            # Check run_accession
+            if not run_accession:
+                print_error("Run accession has not been specified!", "Line", line)
+            else:
+                sample_info.append(run_accession)
+
+            # Check instrument_platform
+            if not instrument_platform:
+                print_error("Instrument platform has not been specified!", "Line", line)
+            else:
+                if instrument_platform not in INSTRUMENT_PLATFORMS:
+                    print_error(
+                        f"Instrument platform {instrument_platform} is not supported!",
+                        f"List of supported platforms {', '.join(INSTRUMENT_PLATFORMS)}",
+                        "Line",
+                        line,
+                    )
+                sample_info.append(instrument_platform)
+
+            ## Auto-detect paired-end/single-end
+            if sample and fastq_1 and fastq_2:  ## Paired-end short reads
+                sample_info.extend(["0", fastq_1, fastq_2, fasta])
+            elif sample and fastq_1 and not fastq_2:  ## Single-end short/long fastq reads
+                sample_info.extend(["1", fastq_1, fastq_2, fasta])
+            elif sample and fasta and not fastq_1 and not fastq_2:  ## Single-end long reads
+                sample_info.extend(["1", fastq_1, fastq_2, fasta])
+            elif fasta and (fastq_1 or fastq_2):
+                print_error(
+                    "FastQ and FastA files cannot be specified together in the same library!",
+                    "Line",
+                    line,
+                )
+            else:
+                print_error("Invalid combination of columns provided!", "Line", line)
+
+            ## Create sample mapping dictionary = { sample: [ run_accession, instrument_platform, single_end, fastq_1, fastq_2 , fasta ] }
+            if sample not in sample_mapping_dict:
+                sample_mapping_dict[sample] = [sample_info]
+            else:
+                if sample_info in sample_mapping_dict[sample]:
+                    print_error("Samplesheet contains duplicate rows!", "Line", line)
+                else:
+                    sample_mapping_dict[sample].append(sample_info)
+
+    ## Write validated samplesheet with appropriate columns
+    HEADER_OUT = [
+        "sample",
+        "run_accession",
+        "instrument_platform",
+        "single_end",
+        "fastq_1",
+        "fastq_2",
+        "fasta",
+    ]
+    if len(sample_mapping_dict) > 0:
+        out_dir = os.path.dirname(file_out)
+        make_dir(out_dir)
+        with open(file_out, "w") as fout:
+            fout.write(",".join(HEADER_OUT) + "\n")
+            for sample in sorted(sample_mapping_dict.keys()):
+                for idx, val in enumerate(sample_mapping_dict[sample]):
+                    fout.write(f"{sample},{','.join(val)}\n")
+    else:
+        print_error("No entries to process!", "Samplesheet: {}".format(file_in))
+
+
+def main(args=None):
+    args = parse_args(args)
+    check_samplesheet(args.FILE_IN, args.FILE_OUT)
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/conf/modules.config b/conf/modules.config
index dd85c0c..d84102c 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -495,12 +495,4 @@ process {
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
     }
-
-    withName: 'EIDO_VALIDATE' {
-        ext.args = '--st-index sample'
-    }
-
-    withName: 'EIDO_CONVERT' {
-        ext.args = '--st-index sample'
-    }
 }
diff --git a/conf/test.config b/conf/test.config
index db9f81d..275e5a0 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -60,10 +60,4 @@ process {
     withName: MEGAN_RMA2INFO_KRONA {
         maxForks = 1
     }
-    withName: 'EIDO_VALIDATE' {
-        ext.args = '--st-index sample'
-    }
-    withName: 'EIDO_CONVERT' {
-        ext.args = '--st-index sample'
-    }
 }
diff --git a/conf/test_krakenuniq.config b/conf/test_krakenuniq.config
index 67b559e..7ba18fa 100644
--- a/conf/test_krakenuniq.config
+++ b/conf/test_krakenuniq.config
@@ -63,10 +63,4 @@ process {
     withName: MEGAN_RMA2INFO_KRONA {
         maxForks = 1
     }
-    withName: 'EIDO_VALIDATE' {
-        ext.args = '--st-index sample'
-    }
-    withName: 'EIDO_CONVERT' {
-        ext.args = '--st-index sample'
-    }
 }
diff --git a/conf/test_pep.config b/conf/test_pep.config
deleted file mode 100644
index 6ce788d..0000000
--- a/conf/test_pep.config
+++ /dev/null
@@ -1,45 +0,0 @@
-params {
-    config_profile_name        = 'Test PEP profile'
-    config_profile_description = 'Minimal test dataset to check pipeline function with PEP file as an input.'
-
-    // Limit resources so that this can run on GitHub Actions
-    max_cpus   = 2
-    max_memory = '6.GB'
-    max_time   = '6.h'
-
-    // Input data
-    input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/pep/test_pep_format_files/config.yaml'
-    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
-    perform_shortread_qc                  = true
-    perform_longread_qc                   = true
-    perform_shortread_complexityfilter    = true
-    perform_shortread_hostremoval         = true
-    perform_longread_hostremoval          = true
-    perform_runmerging                    = true
-    hostremoval_reference                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
-    run_kaiju                             = true
-    run_kraken2                           = true
-    run_bracken                           = true
-    run_malt                              = true
-    run_metaphlan3                        = true
-    run_centrifuge                        = true
-    run_diamond                           = true
-    run_motus                             = false
-    run_krona                             = true
-    krona_taxonomy_directory              = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab'
-    malt_save_reads                       = true
-    kraken2_save_reads                    = true
-    centrifuge_save_reads                 = true
-    diamond_save_reads                    = true
-}
-
-
-process {
-    withName: MALT_RUN {
-        maxForks = 1
-        ext.args = { "-m ${params.malt_mode} -J-Xmx12G" }
-    }
-    withName: MEGAN_RMA2INFO {
-        maxForks = 1
-    }
-}
diff --git a/docs/usage.md b/docs/usage.md
index f748764..685c3e3 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -22,10 +22,6 @@ This samplesheet is then specified on the command line as follows:
 --input '[path to samplesheet file]' --databases '[path to database sheet file]'
 ```
 
-Note pipeline supports both CSV and PEP input sample sheets. Find out more [here](http://pep.databio.org/en/2.1.0/specification/).
-When using PEP as an input, the `samplesheet.csv` must be placed in the same folder
-as `config.yaml` file. A path to `samplesheet.csv` within the config must be absolute.
-
 ### Multiple runs of the same sample
 
 The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate different runs FASTQ files of the same sample before performing profiling, when `--perform_runmerging` is supplied. Below is an example for the same sample sequenced across 3 lanes:
@@ -312,9 +308,6 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof
 - `test`
   - A profile with a complete configuration for automated testing
   - Includes links to test data so needs no other parameters
-- `test_pep`
-  - A profile with a complete configuration for running a pipeline with PEP as input
-  - Includes links to test data so needs no other parameters
 - `docker`
   - A generic configuration profile to be used with [Docker](https://docker.com/)
 - `singularity`
diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy
index 93d2af3..f0107fb 100755
--- a/lib/WorkflowMain.groovy
+++ b/lib/WorkflowMain.groovy
@@ -81,7 +81,7 @@ class WorkflowMain {
 
         // Check input has been provided
         if (!params.input) {
-            log.error "Please provide an input samplesheet or PEP to the pipeline e.g. '--input samplesheet.csv'"
+            log.error "Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'"
             System.exit(1)
         }
     }
diff --git a/modules.json b/modules.json
index 377e902..286ab88 100644
--- a/modules.json
+++ b/modules.json
@@ -55,16 +55,6 @@
                         "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
                         "installed_by": ["modules"]
                     },
-                    "eido/convert": {
-                        "branch": "master",
-                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
-                        "installed_by": ["modules"]
-                    },
-                    "eido/validate": {
-                        "branch": "master",
-                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
-                        "installed_by": ["modules"]
-                    },
                     "falco": {
                         "branch": "master",
                         "git_sha": "fc959214036403ad83efe7a41d43d0606c445cda",
diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf
new file mode 100644
index 0000000..dea4362
--- /dev/null
+++ b/modules/local/samplesheet_check.nf
@@ -0,0 +1,27 @@
+process SAMPLESHEET_CHECK {
+    tag "$samplesheet"
+
+    conda (params.enable_conda ? "conda-forge::python=3.8.3" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/python:3.8.3' :
+        'quay.io/biocontainers/python:3.8.3' }"
+
+    input:
+    path samplesheet
+
+    output:
+    path '*.csv'       , emit: csv
+    path "versions.yml", emit: versions
+
+    script: // This script is bundled with the pipeline, in nf-core/taxprofiler/bin/
+    """
+    check_samplesheet.py \\
+        $samplesheet \\
+        samplesheet.valid.csv
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python --version | sed 's/Python //g')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/eido/convert/main.nf b/modules/nf-core/eido/convert/main.nf
deleted file mode 100644
index 7ec4b8c..0000000
--- a/modules/nf-core/eido/convert/main.nf
+++ /dev/null
@@ -1,38 +0,0 @@
-process EIDO_CONVERT {
-    tag "$samplesheet"
-    label 'process_single'
-
-    conda (params.enable_conda ? "conda-forge::eido=0.1.9" : null)
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://containers.biocontainers.pro/s3/SingImgsRepo/eido/0.1.9_cv1/eido_0.1.9_cv1.sif' :
-        'biocontainers/eido:0.1.9_cv1' }"
-
-    input:
-    path samplesheet
-    val format
-    path pep_input_base_dir
-
-    output:
-    path "versions.yml"           , emit: versions
-    path "${prefix}.${format}"    , emit: samplesheet_converted
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-    prefix = task.ext.prefix ?: "samplesheet_converted"
-    """
-    eido \\
-        convert \\
-        -f $format \\
-        $samplesheet \\
-        $args \\
-        -p samples=${prefix}.${format}
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        eido: \$(echo \$(eido --version 2>&1) | sed 's/^.*eido //;s/ .*//' ))
-    END_VERSIONS
-    """
-}
diff --git a/modules/nf-core/eido/convert/meta.yml b/modules/nf-core/eido/convert/meta.yml
deleted file mode 100644
index bd12e03..0000000
--- a/modules/nf-core/eido/convert/meta.yml
+++ /dev/null
@@ -1,39 +0,0 @@
-name: "eido_convert"
-description: Convert any PEP project or Nextflow samplesheet to any format
-keywords:
-  - eido
-  - convert
-  - PEP
-  - format
-  - samplesheet
-tools:
-  - "eido":
-      description: "Convert any PEP project or Nextflow samplesheet to any format"
-      homepage: "http://eido.databio.org/en/latest/"
-      documentation: "http://eido.databio.org/en/latest/"
-      doi: "10.1093/gigascience/giab077"
-      licence: "BSD-2-Clause"
-
-input:
-  - samplesheet:
-      type: file
-      description: Nextflow samplesheet or PEP project
-      pattern: "*.{yaml,yml,csv}"
-  - format:
-      type: value
-      description: Extension of an output file
-  - pep_input_base_dir:
-      type: file
-      description: Optional path to the directory where files specified in a PEP config file are stored. Any paths specified in the config will need to be relative to this base directory.
-
-output:
-  - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
-  - samplesheet_converted:
-      type: file
-      description: PEP project or samplesheet converted to csv file
-
-authors:
-  - "@rafalstepien"
diff --git a/modules/nf-core/eido/validate/main.nf b/modules/nf-core/eido/validate/main.nf
deleted file mode 100644
index 798d3a0..0000000
--- a/modules/nf-core/eido/validate/main.nf
+++ /dev/null
@@ -1,33 +0,0 @@
-process EIDO_VALIDATE {
-    tag "$samplesheet"
-    label 'process_single'
-
-    conda (params.enable_conda ? "conda-forge::eido=0.1.9" : null)
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://containers.biocontainers.pro/s3/SingImgsRepo/eido/0.1.9_cv2/eido_0.1.9_cv2.sif' :
-        'biocontainers/eido:0.1.9_cv2' }"
-
-    input:
-    path samplesheet
-    path schema
-    path pep_input_base_dir
-
-    output:
-    path "versions.yml"  , emit: versions
-    path "*.log"         , emit: log
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args   = task.ext.args   ?: ''
-    def prefix = task.ext.prefix ?: "validation"
-    """
-    eido validate $args $samplesheet -s $schema -e > ${prefix}.log
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        eido: \$(echo \$(eido --version 2>&1) | sed 's/^.*eido //;s/ .*//' ))
-    END_VERSIONS
-    """
-}
diff --git a/modules/nf-core/eido/validate/meta.yml b/modules/nf-core/eido/validate/meta.yml
deleted file mode 100644
index eb7b295..0000000
--- a/modules/nf-core/eido/validate/meta.yml
+++ /dev/null
@@ -1,41 +0,0 @@
-name: "eido_validate"
-description: Validate samplesheet or PEP config against a schema
-keywords:
-  - eido
-  - validate
-  - schema
-  - format
-  - pep
-tools:
-  - "validate":
-      description: "Validate samplesheet or PEP config against a schema."
-      homepage: "http://eido.databio.org/en/latest/"
-      documentation: "http://eido.databio.org/en/latest/"
-      doi: "10.1093/gigascience/giab077"
-      licence: "BSD-2-Clause"
-
-input:
-  - samplesheet:
-      type: file
-      description: Samplesheet or PEP file to be validated
-      pattern: "*.{yaml,yml,csv}"
-  - schema:
-      type: file
-      description: Schema that the samplesheet will be validated against
-      pattern: "*.{yaml,yml}"
-  - pep_input_base_dir:
-      type: file
-      description: Optional path to the directory where files specified in a PEP config file are stored. Any paths specified in the config will need to be relative to this base directory.
-
-output:
-  - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
-  - log:
-      type: file
-      description: File containing validation log.
-      pattern: "*.log"
-
-authors:
-  - "@rafalstepien"
diff --git a/nextflow.config b/nextflow.config
index 6d5a374..088bcec 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -252,7 +252,6 @@ profiles {
     test_nothing            { includeConfig 'conf/test_nothing.config' }
     test_motus              { includeConfig 'conf/test_motus.config' }
     test_krakenuniq         { includeConfig 'conf/test_krakenuniq.config' }
-    test_pep                { includeConfig 'conf/test_pep.config' }
 }
 
 
diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf
index 46baff2..eb21b9d 100644
--- a/subworkflows/local/input_check.nf
+++ b/subworkflows/local/input_check.nf
@@ -2,41 +2,31 @@
 // Check input samplesheet and get read channels
 //
 
-include { EIDO_VALIDATE } from '../../modules/nf-core/eido/validate/main'
-include { EIDO_CONVERT } from '../../modules/nf-core/eido/convert/main'
+include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check'
 
 workflow INPUT_CHECK {
     take:
-    samplesheet_or_pep_config // file: /path/to/samplesheet.csv or /path/to/pep/config.yaml
-    pep_input_base_dir
+    samplesheet // file: /path/to/samplesheet.csv
 
     main:
-    ch_versions = Channel.empty()
-
-    EIDO_VALIDATE ( samplesheet_or_pep_config, file("$projectDir/assets/samplesheet_schema.yaml"), pep_input_base_dir )
-    ch_versions = ch_versions.mix(EIDO_VALIDATE.out.versions)
-
-    EIDO_CONVERT ( samplesheet_or_pep_config, "csv", pep_input_base_dir )
-    ch_versions = ch_versions.mix(EIDO_CONVERT.out.versions)
-
-    ch_parsed_samplesheet = EIDO_CONVERT.out.samplesheet_converted
+    parsed_samplesheet = SAMPLESHEET_CHECK ( samplesheet )
+        .csv
         .splitCsv ( header:true, sep:',' )
-        .map { check_missing_and_singleend_autodetect(it) }
         .branch {
             fasta: it['fasta'] != ''
             nanopore: it['instrument_platform'] == 'OXFORD_NANOPORE'
             fastq: true
         }
 
-    ch_parsed_samplesheet.fastq
+    parsed_samplesheet.fastq
         .map { create_fastq_channel(it) }
         .set { fastq }
 
-    ch_parsed_samplesheet.nanopore
+    parsed_samplesheet.nanopore
         .map { create_fastq_channel(it) }
         .set { nanopore }
 
-    ch_parsed_samplesheet.fasta
+    parsed_samplesheet.fasta
         .map { create_fasta_channel(it) }
         .set { fasta }
 
@@ -44,20 +34,7 @@ workflow INPUT_CHECK {
     fastq = fastq ?: []                       // channel: [ val(meta), [ reads ] ]
     nanopore = nanopore ?: []                 // channel: [ val(meta), [ reads ] ]
     fasta = fasta ?: []                       // channel: [ val(meta), fasta ]
-    versions = ch_versions                    // channel: [ versions.yml ]
-}
-
-// Function to validate input sheet and auto-detect R1/R2
-def check_missing_and_singleend_autodetect(LinkedHashMap row) {
-
-            // Checks not supported by EIDO(?)
-            if ( ( row['fastq_1'] != "" || row['fastq_2'] != "" ) && row['fasta'] != "" ) { exit 1, "[nf-core/taxprofiler] ERROR: FastQ and FastA files cannot be specified together in the same library. Check input samplesheet! Check sample: ${row['sample']}" }
-            if ( row['fastq_1'] == "" && row['fastq_2'] != "" )                            { exit 1, "[nf-core/taxprofiler] ERROR: Input samplesheet has a missing fastq_1 when fastq_2 is specified. Check sample: ${row['sample']}" }
-
-            single_end = row['fastq_2'] == "" ? true : false
-            row['single_end'] = single_end
-
-            return row
+    versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ]
 }
 
 // Function to get list of [ meta, [ fastq_1, fastq_2 ] ]
@@ -87,12 +64,11 @@ def create_fastq_channel(LinkedHashMap row) {
             if (!file(row.fastq_2).exists()) {
                 exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}"
             }
-            fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ]
+         fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ]
         }
 
     }
     return fastq_meta
-
 }// Function to get list of [ meta, fasta ]
 def create_fasta_channel(LinkedHashMap row) {
     def meta = [:]
diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf
index c9f002c..73aa0ef 100644
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@@ -20,9 +20,8 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true
 // Check mandatory parameters
 if ( params.input ) {
     ch_input              = file(params.input, checkIfExists: true)
-    pep_input_base_dir    = file(params.input).extension.matches("yaml|yml") ? file(file(params.input).getParent(), checkIfExists: true) :  []
 } else {
-    exit 1, "Input samplesheet, or PEP config and base directory not specified"
+    exit 1, "Input samplesheet not specified"
 }
 
 if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
@@ -115,7 +114,7 @@ workflow TAXPROFILER {
         SUBWORKFLOW: Read in samplesheet, validate and stage input files
     */
     INPUT_CHECK (
-        ch_input, pep_input_base_dir
+        ch_input
     )
     ch_versions = ch_versions.mix(INPUT_CHECK.out.versions)
 

From ab591e01fff4f4eb7aa770e4c2b2b0e0093ce1ce Mon Sep 17 00:00:00 2001
From: nf-core-bot <core@nf-co.re>
Date: Mon, 19 Dec 2022 12:10:58 +0000
Subject: [PATCH 02/11] Template update for nf-core/tools version 2.7.2

---
 .github/workflows/fix-linting.yml             |  4 +-
 .github/workflows/linting_comment.yml         |  2 +-
 lib/WorkflowMain.groovy                       |  2 +-
 modules.json                                  |  6 +-
 modules/local/samplesheet_check.nf            |  2 +-
 .../custom/dumpsoftwareversions/main.nf       |  2 +-
 .../templates/dumpsoftwareversions.py         | 87 +++++++++++--------
 modules/nf-core/fastqc/main.nf                | 38 ++++----
 modules/nf-core/multiqc/main.nf               |  2 +-
 nextflow.config                               |  3 -
 nextflow_schema.json                          |  6 --
 workflows/taxprofiler.nf                      |  2 +-
 12 files changed, 75 insertions(+), 81 deletions(-)
 mode change 100644 => 100755 modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py

diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml
index e1332db..96ecd90 100644
--- a/.github/workflows/fix-linting.yml
+++ b/.github/workflows/fix-linting.yml
@@ -34,9 +34,9 @@ jobs:
         id: prettier_status
         run: |
           if prettier --check ${GITHUB_WORKSPACE}; then
-            echo "name=result::pass" >> $GITHUB_OUTPUT
+            echo "result=pass" >> $GITHUB_OUTPUT
           else
-            echo "name=result::fail" >> $GITHUB_OUTPUT
+            echo "result=fail" >> $GITHUB_OUTPUT
           fi
 
       - name: Run 'prettier --write'
diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml
index 3963518..0bbcd30 100644
--- a/.github/workflows/linting_comment.yml
+++ b/.github/workflows/linting_comment.yml
@@ -18,7 +18,7 @@ jobs:
 
       - name: Get PR number
         id: pr_number
-        run: echo "name=pr_number::$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT
+        run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT
 
       - name: Post PR comment
         uses: marocchino/sticky-pull-request-comment@v2
diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy
index c7e396a..5cb2a2f 100755
--- a/lib/WorkflowMain.groovy
+++ b/lib/WorkflowMain.groovy
@@ -72,7 +72,7 @@ class WorkflowMain {
         NfcoreTemplate.checkConfigProvided(workflow, log)
 
         // Check that conda channels are set-up correctly
-        if (params.enable_conda) {
+        if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
             Utils.checkCondaChannels(log)
         }
 
diff --git a/modules.json b/modules.json
index e301ff7..f0ab3a4 100644
--- a/modules.json
+++ b/modules.json
@@ -7,17 +7,17 @@
                 "nf-core": {
                     "custom/dumpsoftwareversions": {
                         "branch": "master",
-                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
                         "installed_by": ["modules"]
                     },
                     "fastqc": {
                         "branch": "master",
-                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
                         "installed_by": ["modules"]
                     },
                     "multiqc": {
                         "branch": "master",
-                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
                         "installed_by": ["modules"]
                     }
                 }
diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf
index db9c508..ed686c8 100644
--- a/modules/local/samplesheet_check.nf
+++ b/modules/local/samplesheet_check.nf
@@ -2,7 +2,7 @@ process SAMPLESHEET_CHECK {
     tag "$samplesheet"
     label 'process_single'
 
-    conda (params.enable_conda ? "conda-forge::python=3.8.3" : null)
+    conda "conda-forge::python=3.8.3"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/python:3.8.3' :
         'quay.io/biocontainers/python:3.8.3' }"
diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf
index cebb6e0..3df2176 100644
--- a/modules/nf-core/custom/dumpsoftwareversions/main.nf
+++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf
@@ -2,7 +2,7 @@ process CUSTOM_DUMPSOFTWAREVERSIONS {
     label 'process_single'
 
     // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container
-    conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null)
+    conda "bioconda::multiqc=1.13"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' :
         'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }"
diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
old mode 100644
new mode 100755
index 787bdb7..e55b8d4
--- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
+++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
@@ -1,5 +1,9 @@
 #!/usr/bin/env python
 
+
+"""Provide functions to merge multiple versions.yml files."""
+
+
 import platform
 from textwrap import dedent
 
@@ -7,6 +11,7 @@ import yaml
 
 
 def _make_versions_html(versions):
+    """Generate a tabular HTML output of all versions for MultiQC."""
     html = [
         dedent(
             """\\
@@ -45,47 +50,53 @@ def _make_versions_html(versions):
     return "\\n".join(html)
 
 
-versions_this_module = {}
-versions_this_module["${task.process}"] = {
-    "python": platform.python_version(),
-    "yaml": yaml.__version__,
-}
+def main():
+    """Load all version files and generate merged output."""
+    versions_this_module = {}
+    versions_this_module["${task.process}"] = {
+        "python": platform.python_version(),
+        "yaml": yaml.__version__,
+    }
 
-with open("$versions") as f:
-    versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module
+    with open("$versions") as f:
+        versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module
 
-# aggregate versions by the module name (derived from fully-qualified process name)
-versions_by_module = {}
-for process, process_versions in versions_by_process.items():
-    module = process.split(":")[-1]
-    try:
-        if versions_by_module[module] != process_versions:
-            raise AssertionError(
-                "We assume that software versions are the same between all modules. "
-                "If you see this error-message it means you discovered an edge-case "
-                "and should open an issue in nf-core/tools. "
-            )
-    except KeyError:
-        versions_by_module[module] = process_versions
+    # aggregate versions by the module name (derived from fully-qualified process name)
+    versions_by_module = {}
+    for process, process_versions in versions_by_process.items():
+        module = process.split(":")[-1]
+        try:
+            if versions_by_module[module] != process_versions:
+                raise AssertionError(
+                    "We assume that software versions are the same between all modules. "
+                    "If you see this error-message it means you discovered an edge-case "
+                    "and should open an issue in nf-core/tools. "
+                )
+        except KeyError:
+            versions_by_module[module] = process_versions
 
-versions_by_module["Workflow"] = {
-    "Nextflow": "$workflow.nextflow.version",
-    "$workflow.manifest.name": "$workflow.manifest.version",
-}
+    versions_by_module["Workflow"] = {
+        "Nextflow": "$workflow.nextflow.version",
+        "$workflow.manifest.name": "$workflow.manifest.version",
+    }
 
-versions_mqc = {
-    "id": "software_versions",
-    "section_name": "${workflow.manifest.name} Software Versions",
-    "section_href": "https://github.com/${workflow.manifest.name}",
-    "plot_type": "html",
-    "description": "are collected at run time from the software output.",
-    "data": _make_versions_html(versions_by_module),
-}
+    versions_mqc = {
+        "id": "software_versions",
+        "section_name": "${workflow.manifest.name} Software Versions",
+        "section_href": "https://github.com/${workflow.manifest.name}",
+        "plot_type": "html",
+        "description": "are collected at run time from the software output.",
+        "data": _make_versions_html(versions_by_module),
+    }
 
-with open("software_versions.yml", "w") as f:
-    yaml.dump(versions_by_module, f, default_flow_style=False)
-with open("software_versions_mqc.yml", "w") as f:
-    yaml.dump(versions_mqc, f, default_flow_style=False)
+    with open("software_versions.yml", "w") as f:
+        yaml.dump(versions_by_module, f, default_flow_style=False)
+    with open("software_versions_mqc.yml", "w") as f:
+        yaml.dump(versions_mqc, f, default_flow_style=False)
 
-with open("versions.yml", "w") as f:
-    yaml.dump(versions_this_module, f, default_flow_style=False)
+    with open("versions.yml", "w") as f:
+        yaml.dump(versions_this_module, f, default_flow_style=False)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf
index 0573036..9ae5838 100644
--- a/modules/nf-core/fastqc/main.nf
+++ b/modules/nf-core/fastqc/main.nf
@@ -2,7 +2,7 @@ process FASTQC {
     tag "$meta.id"
     label 'process_medium'
 
-    conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null)
+    conda "bioconda::fastqc=0.11.9"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' :
         'quay.io/biocontainers/fastqc:0.11.9--0' }"
@@ -20,30 +20,22 @@ process FASTQC {
 
     script:
     def args = task.ext.args ?: ''
-    // Add soft-links to original FastQs for consistent naming in pipeline
     def prefix = task.ext.prefix ?: "${meta.id}"
-    if (meta.single_end) {
-        """
-        [ ! -f  ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz
-        fastqc $args --threads $task.cpus ${prefix}.fastq.gz
+    // Make list of old name and new name pairs to use for renaming in the bash while loop
+    def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] }
+    def rename_to = old_new_pairs*.join(' ').join(' ')
+    def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ')
+    """
+    printf "%s %s\\n" $rename_to | while read old_name new_name; do
+        [ -f "\${new_name}" ] || ln -s \$old_name \$new_name
+    done
+    fastqc $args --threads $task.cpus $renamed_files
 
-        cat <<-END_VERSIONS > versions.yml
-        "${task.process}":
-            fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" )
-        END_VERSIONS
-        """
-    } else {
-        """
-        [ ! -f  ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz
-        [ ! -f  ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz
-        fastqc $args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz
-
-        cat <<-END_VERSIONS > versions.yml
-        "${task.process}":
-            fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" )
-        END_VERSIONS
-        """
-    }
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" )
+    END_VERSIONS
+    """
 
     stub:
     def prefix = task.ext.prefix ?: "${meta.id}"
diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf
index a8159a5..68f66be 100644
--- a/modules/nf-core/multiqc/main.nf
+++ b/modules/nf-core/multiqc/main.nf
@@ -1,7 +1,7 @@
 process MULTIQC {
     label 'process_single'
 
-    conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null)
+    conda "bioconda::multiqc=1.13"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' :
         'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }"
diff --git a/nextflow.config b/nextflow.config
index 46d4fa4..a4dc679 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -39,7 +39,6 @@ params {
     validate_params            = true
     show_hidden_params         = false
     schema_ignore_params       = 'genomes'
-    enable_conda               = false
 
 
     // Config options
@@ -81,7 +80,6 @@ try {
 profiles {
     debug { process.beforeScript = 'echo $HOSTNAME' }
     conda {
-        params.enable_conda    = true
         conda.enabled          = true
         docker.enabled         = false
         singularity.enabled    = false
@@ -90,7 +88,6 @@ profiles {
         charliecloud.enabled   = false
     }
     mamba {
-        params.enable_conda    = true
         conda.enabled          = true
         conda.useMamba         = true
         docker.enabled         = false
diff --git a/nextflow_schema.json b/nextflow_schema.json
index d0b754a..4b4b61e 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -263,12 +263,6 @@
                     "description": "Show all params when using `--help`",
                     "hidden": true,
                     "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters."
-                },
-                "enable_conda": {
-                    "type": "boolean",
-                    "description": "Run this workflow with Conda. You can also use '-profile conda' instead of providing this parameter.",
-                    "hidden": true,
-                    "fa_icon": "fas fa-bacon"
                 }
             }
         }
diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf
index 76f69f5..ce8417e 100644
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@@ -82,7 +82,7 @@ workflow TAXPROFILER {
     ch_versions = ch_versions.mix(FASTQC.out.versions.first())
 
     CUSTOM_DUMPSOFTWAREVERSIONS (
-        ch_versions.unique{ it.text }.collectFile(name: 'collated_versions.yml')
+        ch_versions.unique().collectFile(name: 'collated_versions.yml')
     )
 
     //

From 770924587495e74a3bef3a57f3439d64f28595c4 Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Tue, 20 Dec 2022 13:54:05 +0100
Subject: [PATCH 03/11] Update subworkflows/local/input_check.nf

Co-authored-by: Moritz E. Beber <midnighter@posteo.net>
---
 subworkflows/local/input_check.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf
index eb21b9d..7fd0072 100644
--- a/subworkflows/local/input_check.nf
+++ b/subworkflows/local/input_check.nf
@@ -34,7 +34,7 @@ workflow INPUT_CHECK {
     fastq = fastq ?: []                       // channel: [ val(meta), [ reads ] ]
     nanopore = nanopore ?: []                 // channel: [ val(meta), [ reads ] ]
     fasta = fasta ?: []                       // channel: [ val(meta), fasta ]
-    versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ]
+    versions = SAMPLESHEET_CHECK.out.versions.first()  // channel: [ versions.yml ]
 }
 
 // Function to get list of [ meta, [ fastq_1, fastq_2 ] ]

From 11db981a887b27a271bede8cda0406a47a84743e Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Tue, 20 Dec 2022 13:55:15 +0100
Subject: [PATCH 04/11] minor fixes

---
 bin/check_samplesheet.py          |  4 ----
 docs/usage.md                     |  2 +-
 subworkflows/local/input_check.nf | 11 ++++-------
 3 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py
index ca54ed9..a45cb93 100755
--- a/bin/check_samplesheet.py
+++ b/bin/check_samplesheet.py
@@ -50,13 +50,9 @@ def check_samplesheet(file_in, file_out):
 
     FQ_EXTENSIONS = (".fq.gz", ".fastq.gz")
     FA_EXTENSIONS = (
-        ".fa",
         ".fa.gz",
-        ".fasta",
         ".fasta.gz",
-        ".fna",
         ".fna.gz",
-        ".fas",
         ".fas.gz",
     )
     INSTRUMENT_PLATFORMS = [
diff --git a/docs/usage.md b/docs/usage.md
index 685c3e3..4f3e038 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -12,7 +12,7 @@
 
 nf-core/taxprofiler can accept as input raw or preprocessed single- or paired-end short-read (e.g. Illumina) FASTQ files, long-read FASTQ files (e.g. Oxford Nanopore), or FASTA sequences (available for a subset of profilers).
 
-> ⚠️ Input FASTQ files _must_ be gzipped, while FASTA files may optionally be uncompressed (although this is not recommended)
+> ⚠️ Input FASTQ and FASTA files _must_ be gzipped
 
 You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 6 columns, and a header row as shown in the examples below. Furthermother, nf-core/taxprofiler also requires a second comma-separated file of 3 columns with a header row as in the examples below.
 
diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf
index 7fd0072..672c0e8 100644
--- a/subworkflows/local/input_check.nf
+++ b/subworkflows/local/input_check.nf
@@ -18,23 +18,20 @@ workflow INPUT_CHECK {
             fastq: true
         }
 
-    parsed_samplesheet.fastq
+    fastq = parsed_samplesheet.fastq
         .map { create_fastq_channel(it) }
-        .set { fastq }
 
-    parsed_samplesheet.nanopore
+    nanopore = parsed_samplesheet.nanopore
         .map { create_fastq_channel(it) }
-        .set { nanopore }
 
-    parsed_samplesheet.fasta
+    fasta = parsed_samplesheet.fasta
         .map { create_fasta_channel(it) }
-        .set { fasta }
 
     emit:
     fastq = fastq ?: []                       // channel: [ val(meta), [ reads ] ]
     nanopore = nanopore ?: []                 // channel: [ val(meta), [ reads ] ]
     fasta = fasta ?: []                       // channel: [ val(meta), fasta ]
-    versions = SAMPLESHEET_CHECK.out.versions.first()  // channel: [ versions.yml ]
+    versions = SAMPLESHEET_CHECK.out.versions.first() // channel: [ versions.yml ]
 }
 
 // Function to get list of [ meta, [ fastq_1, fastq_2 ] ]

From 41063bc6703f044d59cc20f601cbbc419bf10e5c Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Tue, 20 Dec 2022 13:56:01 +0100
Subject: [PATCH 05/11] Apply suggestions from code review

Co-authored-by: Moritz E. Beber <midnighter@posteo.net>
---
 bin/check_samplesheet.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py
index a45cb93..9bc84cc 100755
--- a/bin/check_samplesheet.py
+++ b/bin/check_samplesheet.py
@@ -104,22 +104,23 @@ def check_samplesheet(file_in, file_out):
 
             ## Pull out only relevant columns for downstream checking
             line_parsed = [x.strip().strip('"') for x in line.strip().split(",")]
-            lspl = [line_parsed[i] for i in header_locs.values()]
 
             # Check valid number of columns per row
-            if len(lspl) < len(HEADER):
+            if len(line_parsed) < len(HEADER):
                 print_error(
                     "Invalid number of columns (minimum = {})!".format(len(HEADER)),
                     "Line",
                     line,
                 )
-            num_cols = len([x for x in lspl if x])
+            num_cols = len([x for x in line_parsed if x])
             if num_cols < MIN_COLS:
                 print_error(
                     "Invalid number of populated columns (minimum = {})!".format(MIN_COLS),
                     "Line",
                     line,
                 )
+                
+            lspl = [line_parsed[i] for i in header_locs.values()]
 
             ## Check sample name entries
 
@@ -169,7 +170,7 @@ def check_samplesheet(file_in, file_out):
             else:
                 if instrument_platform not in INSTRUMENT_PLATFORMS:
                     print_error(
-                        f"Instrument platform {instrument_platform} is not supported!",
+                        f"Instrument platform {instrument_platform} is not supported! "
                         f"List of supported platforms {', '.join(INSTRUMENT_PLATFORMS)}",
                         "Line",
                         line,

From 577da7e835eda533640a4c8f6521a12babdb4a7c Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Tue, 20 Dec 2022 13:59:14 +0100
Subject: [PATCH 06/11] Remove .first() as the module only executes once anyway
 and we get a ugly warning

---
 subworkflows/local/input_check.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf
index 672c0e8..57061f1 100644
--- a/subworkflows/local/input_check.nf
+++ b/subworkflows/local/input_check.nf
@@ -31,7 +31,7 @@ workflow INPUT_CHECK {
     fastq = fastq ?: []                       // channel: [ val(meta), [ reads ] ]
     nanopore = nanopore ?: []                 // channel: [ val(meta), [ reads ] ]
     fasta = fasta ?: []                       // channel: [ val(meta), fasta ]
-    versions = SAMPLESHEET_CHECK.out.versions.first() // channel: [ versions.yml ]
+    versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ]
 }
 
 // Function to get list of [ meta, [ fastq_1, fastq_2 ] ]

From 656956b0f9a8bb04e31e8f74d4c7ac0380269fde Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Tue, 20 Dec 2022 14:08:42 +0100
Subject: [PATCH 07/11] Black formatting

---
 bin/check_samplesheet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py
index 9bc84cc..f5b0e6a 100755
--- a/bin/check_samplesheet.py
+++ b/bin/check_samplesheet.py
@@ -119,7 +119,7 @@ def check_samplesheet(file_in, file_out):
                     "Line",
                     line,
                 )
-                
+
             lspl = [line_parsed[i] for i in header_locs.values()]
 
             ## Check sample name entries

From 24a8da2086151c21a1cdfeda9575b898fa9371ef Mon Sep 17 00:00:00 2001
From: Sofia Stamouli <sofia.stamouli@scilifelab.se>
Date: Tue, 20 Dec 2022 19:59:55 +0100
Subject: [PATCH 08/11] Fix linting

---
 nextflow_schema.json | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index 0c7ce3d..c608d90 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -712,6 +712,15 @@
                     "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details.",
                     "hidden": true
                 },
+                "fasta": {
+                    "type": "string",
+                    "description": "Path to FASTA genome file.",
+                    "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.",
+                    "pattern": "^\\\\S+\\\\.fn?a(sta)?(\\\\.gz)?$",
+                    "format": "file-path",
+                    "mimetype": "text/plain",
+		    "fa_icon": "far fa-file-code"
+                },
                 "igenomes_base": {
                     "type": "string",
                     "format": "directory-path",

From 612f532fb24a52c46261c45151133928ee2939c6 Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Tue, 20 Dec 2022 20:08:12 +0100
Subject: [PATCH 09/11] Bump KrakenUniq version

---
 modules.json                                           | 2 +-
 modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/modules.json b/modules.json
index 0a43696..134caa0 100644
--- a/modules.json
+++ b/modules.json
@@ -118,7 +118,7 @@
                     },
                     "krakenuniq/preloadedkrakenuniq": {
                         "branch": "master",
-                        "git_sha": "05649975c6611c6e007537a7984e186e12ae03af",
+                        "git_sha": "a6eb17f65b3ee5761c25c075a6166c9f76733cee",
                         "installed_by": ["modules"]
                     },
                     "krona/ktimporttaxonomy": {
diff --git a/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf
index 0ecacee..0cb402f 100644
--- a/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf
+++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf
@@ -2,10 +2,10 @@ process KRAKENUNIQ_PRELOADEDKRAKENUNIQ {
     tag "$meta.id"
     label 'process_high'
 
-    conda (params.enable_conda ? "bioconda::krakenuniq=1.0.0" : null)
+    conda "bioconda::krakenuniq=1.0.2"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/krakenuniq:1.0.0--pl5321h19e8d03_0':
-        'quay.io/biocontainers/krakenuniq:1.0.0--pl5321h19e8d03_0' }"
+        'https://depot.galaxyproject.org/singularity/krakenuniq:1.0.2--pl5321h19e8d03_0':
+        'quay.io/biocontainers/krakenuniq:1.0.2--pl5321h19e8d03_0' }"
 
     input:
     tuple val(meta), path(fastqs)

From 1706555eff392e5d4caf7a8fbe11a7fd317402e7 Mon Sep 17 00:00:00 2001
From: Sofia Stamouli <sofia.stamouli@scilifelab.se>
Date: Tue, 20 Dec 2022 20:10:05 +0100
Subject: [PATCH 10/11] Prettier

---
 nextflow_schema.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index c608d90..6d48671 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -719,7 +719,7 @@
                     "pattern": "^\\\\S+\\\\.fn?a(sta)?(\\\\.gz)?$",
                     "format": "file-path",
                     "mimetype": "text/plain",
-		    "fa_icon": "far fa-file-code"
+                    "fa_icon": "far fa-file-code"
                 },
                 "igenomes_base": {
                     "type": "string",

From 1279437aa105f8fdf5feb73f57ccf66aa06b1702 Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Tue, 20 Dec 2022 21:48:08 +0100
Subject: [PATCH 11/11] Apply suggestions from code review

---
 nextflow.config      | 2 +-
 nextflow_schema.json | 9 ---------
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index acfe2a0..f5ab538 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -38,7 +38,7 @@ params {
     version                    = false
     validate_params            = true
     show_hidden_params         = false
-    schema_ignore_params       = 'genomes'
+    schema_ignore_params       = 'genomes,fasta'
 
 
     // Config options
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 6d48671..0c7ce3d 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -712,15 +712,6 @@
                     "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details.",
                     "hidden": true
                 },
-                "fasta": {
-                    "type": "string",
-                    "description": "Path to FASTA genome file.",
-                    "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.",
-                    "pattern": "^\\\\S+\\\\.fn?a(sta)?(\\\\.gz)?$",
-                    "format": "file-path",
-                    "mimetype": "text/plain",
-                    "fa_icon": "far fa-file-code"
-                },
                 "igenomes_base": {
                     "type": "string",
                     "format": "directory-path",