Add pep_input_base_dir to workflow. Just need to update modules

2024-11-21 23:36:03 +00:00 · 2022-09-15 12:31:49 -04:00 · 2022-09-15 12:31:49 -04:00 · 768f9980d8
commit 768f9980d8
parent 5f3eee9a4a
5 changed files with 17 additions and 15 deletions
--- a/README.md
+++ b/README.md
@ -67,16 +67,14 @@ On release, automated continuous integration tests run the pipeline on a full-si
   > - If you are using `singularity`, please use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs.
   > - If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs.

-4. You can also run the pipeline using PEP format as an input by running following command:
-   ```console
-   nextflow run main.nf -profile test_pep,docker --outdir <OUTDIR>
-   ```
-5. Start running your own analysis!
+4. Start running your own analysis!

   ```console
   nextflow run nf-core/taxprofiler --input samplesheet.csv --databases database.csv --outdir <OUTDIR> --run_<TOOL1> --run_<TOOL1> -profile <docker/singularity/podman/shifter/charliecloud/conda/institute>
   ```

+  Note pipeline supports both CSV and PEP input sample sheets. Find out more [here](http://pep.databio.org/en/2.1.0/specification/).
+
 ## Documentation

 The nf-core/taxprofiler pipeline comes with documentation about the pipeline [usage](https://nf-co.re/taxprofiler/usage), [parameters](https://nf-co.re/taxprofiler/parameters) and [output](https://nf-co.re/taxprofiler/output).
--- a/assets/samplesheet_schema.yaml
+++ b/assets/samplesheet_schema.yaml
@ -33,15 +33,15 @@ properties:
            ]
        fastq1:
          type: ["string", "null"]
-          description: "FASTQ file for read 1."
+          description: "Optional FASTQ file for read 1 of paired-end sequenced libraries."
          pattern: "^[\\S]+.(fq\\.gz|fastq\\.gz)$"
        fastq2:
          type: ["string", "null"]
-          description: "FASTQ file for read 2."
+          description: "Optional FASTQ file for read 2 of paired-end sequenced libraries."
          pattern: "^[\\S]+.(fq\\.gz|fastq\\.gz)$"
        fasta:
          type: ["string", "null"]
-          description: "Path to FASTA file."
+          description: "Optional FASTA file."
          pattern: "^[\\S]+.(fa\\.gz|fasta\\.gz)$"
      required:
        - sample
--- a/docs/usage.md
+++ b/docs/usage.md
@ -22,6 +22,10 @@ This samplesheet is then specified on the command line as follows:
 --input '[path to samplesheet file]' --databases '[path to database sheet file]'
 ```

+Note pipeline supports both CSV and PEP input sample sheets. Find out more [here](http://pep.databio.org/en/2.1.0/specification/).
+When using PEP as an input, the `samplesheet.csv` must be placed in the same folder
+as `config.yaml` file. A path to `samplesheet.csv` within the config must be absolute.
+
 ### Multiple runs of the same sample

 The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate different runs FASTQ files of the same sample before performing profiling, when `--perform_runmerging` is supplied. Below is an example for the same sample sequenced across 3 lanes:
--- a/subworkflows/local/input_check.nf
+++ b/subworkflows/local/input_check.nf
@ -9,11 +9,11 @@ include { EIDO_CONVERT } from '../../modules/nf-core/modules/eido/convert/main'
 workflow INPUT_CHECK {
    take:
    samplesheet_or_pep_config // file: /path/to/samplesheet.csv or /path/to/pep/config.yaml
-    base_dir // file: path to PEP directory
+    ch_pep_input_base_dir

    main:
-    EIDO_VALIDATE ( samplesheet_or_pep_config, file("$projectDir/assets/samplesheet_schema.yaml") )
-    converted_samplesheet = EIDO_CONVERT ( samplesheet_or_pep_config, "csv" )
+    EIDO_VALIDATE ( samplesheet_or_pep_config, file("$projectDir/assets/samplesheet_schema.yaml"), ch_pep_input_base_dir )
+    converted_samplesheet = EIDO_CONVERT ( samplesheet_or_pep_config, "csv", ch_pep_input_base_dir )
    parsed_samplesheet = SAMPLESHEET_CHECK ( converted_samplesheet.samplesheet_converted )
        .csv
        .splitCsv ( header:true, sep:',' )
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@ -19,18 +19,18 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true
 // Check mandatory parameters
 if (params.input) {
    ch_input = file(params.input)
-    ch_input_basedir = []
+    ch_pep_input_base_dir = []

 } else if (params.pep) {

    if ( params.pep.startsWith("http://") || params.pep.startsWith("https://") ) {
        ch_input = file(params.pep)
-        ch_input_basedir = []
+        ch_pep_input_base_dir = []
    }

    else {
        ch_input = file(params.pep)
-        ch_input_basedir = new File(params.pep).getParent()
+        ch_pep_input_base_dir = new File(params.pep).getParent()
    }

 }  else {
@ -117,7 +117,7 @@ workflow TAXPROFILER {
        SUBWORKFLOW: Read in samplesheet, validate and stage input files
    */
    INPUT_CHECK (
-        ch_input, ch_input_basedir
+        ch_input, ch_pep_input_base_dir
    )
    ch_versions = ch_versions.mix(INPUT_CHECK.out.versions)