Merge branch 'dev' into hostremoval-publish-fix

2024-11-22 23:49:55 +00:00 · 2022-04-21 11:43:06 +02:00 · 2022-04-21 11:43:06 +02:00 · cd3c8f6a89
commit cd3c8f6a89
parent d16cd75652 e71e7b86a2
12 changed files with 159 additions and 17 deletions
--- a/README.md
+++ b/README.md
@ -44,6 +44,7 @@ On release, automated continuous integration tests run the pipeline on a full-si
   - [Centrifuge](https://ccb.jhu.edu/software/centrifuge/)
   - [Kaiju](https://kaiju.binf.ku.dk/)
   - [mOTUs](https://motu-tool.org/)
   - [MetaMaps](https://github.com/DiltheyLab/MetaMaps)
 4. Perform optional post-processing with:
   - [bracken](https://ccb.jhu.edu/software/bracken/)
 5. Standardises output tables
--- a/conf/modules.config
+++ b/conf/modules.config
@ -251,7 +251,7 @@ process {
            pattern: '*.txt'
        ]
        ext.args = { "${meta.db_params}" }
-        ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
+        ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
    }
    withName: CUSTOM_DUMPSOFTWAREVERSIONS {
@ -270,4 +270,13 @@ process {
        ]
    }
    withName: KAIJU_KAIJU {
        publishDir = [
            path: { "${params.outdir}/kaiju/${meta.db_name}" },
            mode: params.publish_dir_mode,
            pattern: '*.tsv'
        ]
        ext.args = { "${meta.db_params}" }
        ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
    }
 }
--- a/conf/test.config
+++ b/conf/test.config
@ -22,15 +22,16 @@ params {
    // Input data
    // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
    // TODO nf-core: Give any required params for the test so that command line flags are not needed
-    input                         = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
+    input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
-    databases                     = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
+    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
    run_kraken2                   = true
    run_malt                      = true
    run_metaphlan3                = true
    run_centrifuge                = true
    perform_shortread_clipmerge           = true
    perform_longread_clip                 = false
    perform_shortread_complexityfilter    = true
    perform_shortread_hostremoval         = true
    shortread_hostremoval_reference       = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
    run_kaiju                             = true
    run_kraken2                           = true
    run_malt                              = true
    run_metaphlan3                        = true
    run_centrifuge                        = true
 }
--- a/docs/usage.md
+++ b/docs/usage.md
@ -124,6 +124,10 @@ Expected (uncompressed) database files for each tool are as follows:
  - `mpa_v30_CHOCOPhlAn_201901.rev.1.bt2`
  - `mpa_v30_CHOCOPhlAn_201901.rev.2.bt2`
  - `mpa_latest`
 - **Kaiju** output of `kaiju-makedb`. A directory containing:
  - `kaiju_db_*.fmi`
  - `nodes.dmp`
  - `names.dmp`
 ## Running the pipeline
--- a/modules.json
+++ b/modules.json
@ -53,6 +53,9 @@
            },
            "untar": {
                "git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918"
            },
            "kaiju/kaiju": {
                "git_sha": "8856f127c58f6af479128be8b8df4d42e442ddbe"
            }
        }
    }
--- a/modules/nf-core/modules/kaiju/kaiju/main.nf
+++ b/modules/nf-core/modules/kaiju/kaiju/main.nf
@ -0,0 +1,41 @@
 process KAIJU_KAIJU {
    tag "$meta.id"
    label 'process_high'
    conda (params.enable_conda ? "bioconda::kaiju=1.8.2" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/kaiju:1.8.2--h5b5514e_1':
        'quay.io/biocontainers/kaiju:1.8.2--h5b5514e_1' }"
    input:
    tuple val(meta), path(reads)
    path(db)
    output:
    tuple val(meta), path('*.tsv'), emit: results
    path "versions.yml"           , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    def input = meta.single_end ? "-i ${reads}" : "-i ${reads[0]} -j ${reads[1]}"
    """
    dbnodes=`find -L ${db} -name "*nodes.dmp"`
    dbname=`find -L ${db} -name "*.fmi" -not -name "._*"`
    kaiju \\
        $args \\
        -z $task.cpus \\
        -t \$dbnodes \\
        -f \$dbname \\
        -o ${prefix}.tsv \\
        $input
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        kaiju: \$(echo \$( kaiju -h 2>&1 | sed -n 1p | sed 's/^.*Kaiju //' ))
    END_VERSIONS
    """
 }
--- a/modules/nf-core/modules/kaiju/kaiju/meta.yml
+++ b/modules/nf-core/modules/kaiju/kaiju/meta.yml
@ -0,0 +1,53 @@
 name: kaiju_kaiju
 description: Taxonomic classification of metagenomic sequence data using a protein reference database
 keywords:
  - classify
  - metagenomics
  - fastq
  - taxonomic profiling
 tools:
  - kaiju:
      description: Fast and sensitive taxonomic classification for metagenomics
      homepage: https://kaiju.binf.ku.dk/
      documentation: https://github.com/bioinformatics-centre/kaiju/blob/master/README.md
      tool_dev_url: https://github.com/bioinformatics-centre/kaiju
      doi: "10.1038/ncomms11257"
      licence: ["GNU GPL v3"]
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - reads:
      type: file
      description: |
        List of input fastq/fasta files of size 1 and 2 for single-end and paired-end data,
        respectively.
      pattern: "*.{fastq,fq,fasta,fa,fsa,fas,fna,fastq.gz,fq.gz,fasta.gz,fa.gz,fsa.gz,fas.gz,fna.gz}"
  - db:
      type: files
      description: |
        List containing the database and nodes files for Kaiju
        e.g. [ 'database.fmi', 'nodes.dmp' ]
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - results:
      type: file
      description: Results with taxonomic classification of each read
      pattern: "*.tsv"
 authors:
  - "@talnor"
  - "@sofstam"
  - "@jfy133"
--- a/nextflow.config
+++ b/nextflow.config
@ -104,6 +104,9 @@ params {
    // metaphlan3
    run_metaphlan3             = false
    // kaiju
    run_kaiju                  = false
 }
 // Load base.config by default for all pipelines
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@ -379,6 +379,9 @@
        "save_hostremoval_unmapped": {
            "type": "boolean"
        },
        "run_kaiju": {
            "type": "boolean"
        },
        "malt_generatemegansummary": {
            "type": "boolean"
        }
--- a/subworkflows/local/input_check.nf
+++ b/subworkflows/local/input_check.nf
@ -45,6 +45,7 @@ def create_fastq_channel(LinkedHashMap row) {
    meta.run_accession          = row.run_accession
    meta.instrument_platform    = row.instrument_platform
    meta.single_end             = row.single_end.toBoolean()
    meta.is_fasta               = false
    // add path(s) of the fastq file(s) to the meta map
    def fastq_meta = []
@ -75,6 +76,7 @@ def create_fasta_channel(LinkedHashMap row) {
    meta.run_accession          = row.run_accession
    meta.instrument_platform    = row.instrument_platform
    meta.single_end             = true
    meta.is_fasta               = true
    def array = []
    if (!file(row.fasta).exists()) {
--- a/subworkflows/local/profiling.nf
+++ b/subworkflows/local/profiling.nf
@ -7,6 +7,7 @@ include { MEGAN_RMA2INFO              } from '../../modules/nf-core/modules/mega
 include { KRAKEN2_KRAKEN2             } from '../../modules/nf-core/modules/kraken2/kraken2/main'
 include { CENTRIFUGE_CENTRIFUGE       } from '../../modules/nf-core/modules/centrifuge/centrifuge/main'
 include { METAPHLAN3                  } from '../../modules/nf-core/modules/metaphlan3/main'
 include { KAIJU_KAIJU                 } from '../../modules/nf-core/modules/kaiju/kaiju/main'
 workflow PROFILING {
    take:
@ -37,6 +38,7 @@ workflow PROFILING {
                kraken2: it[2]['tool'] == 'kraken2'
                metaphlan3: it[2]['tool'] == 'metaphlan3'
                centrifuge: it[2]['tool'] == 'centrifuge'
                kaiju: it[2]['tool'] == 'kaiju'
                unknown: true
            }
@ -77,13 +79,28 @@ workflow PROFILING {
                            }
    ch_input_for_centrifuge =  ch_input_for_profiling.centrifuge
-                                .multiMap {
+                            .filter{
-                                    it ->
+                                if (it[0].is_fasta) log.warn "[nf-core/taxprofiler] Centrifuge currently does not accept FASTA files as input. Skipping Centrifuge for sample ${it[0].id}."
-                                        reads: [ it[0] + it[2], it[1] ]
+                                !it[0].is_fasta
-                                        db: it[3]
+                            }
-                                }
+                            .multiMap {
                                it ->
                                    reads: [ it[0] + it[2], it[1] ]
                                    db: it[3]
                            }
    ch_input_for_metaphlan3 = ch_input_for_profiling.metaphlan3
                            .filter{
                                if (it[0].is_fasta) log.warn "[nf-core/taxprofiler] MetaPhlAn3 currently does not accept FASTA files as input. Skipping MetaPhlAn3 for sample ${it[0].id}."
                                !it[0].is_fasta
                            }
                            .multiMap {
                                it ->
                                    reads: [it[0] + it[2], it[1]]
                                    db: it[3]
                            }
    ch_input_for_kaiju = ch_input_for_profiling.kaiju
                            .multiMap {
                                it ->
                                    reads: [it[0] + it[2], it[1]]
@ -135,6 +152,10 @@ workflow PROFILING {
        ch_raw_profiles    = ch_raw_profiles.mix( METAPHLAN3.out.biom )
    }
    if ( params.run_kaiju ) {
        KAIJU_KAIJU ( ch_input_for_kaiju.reads, ch_input_for_kaiju.db )
        ch_versions = ch_versions.mix( KAIJU_KAIJU.out.versions.first() )
    }
    emit:
    profiles = ch_raw_profiles    // channel: [ val(meta), [ reads ] ] - should be text files or biom
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@ -19,11 +19,11 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true
 // Check mandatory parameters
 if (params.input    ) { ch_input     = file(params.input)     } else { exit 1, 'Input samplesheet not specified!' }
 if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
-if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] warning: MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files."
+if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files."
-if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "[nf-core/taxprofiler] error: cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs"
+if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "ERROR: [nf-core/taxprofiler] cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs"
-if (params.perform_shortread_hostremoval && !params.shortread_hostremoval_reference) { exit 1, "[nf-core/taxprofiler] error: --shortread_hostremoval requested but no --shortread_hostremoval_reference FASTA supplied. Check input." }
+if (params.perform_shortread_hostremoval && !params.shortread_hostremoval_reference) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval requested but no --shortread_hostremoval_reference FASTA supplied. Check input." }
-if (!params.shortread_hostremoval_reference && params.shortread_hostremoval_reference_index) { exit 1, "[nf-core/taxprofiler] error: --shortread_hostremoval_index provided but no --shortread_hostremoval_reference FASTA supplied. Check input." }
+if (!params.shortread_hostremoval_reference && params.shortread_hostremoval_reference_index) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval_index provided but no --shortread_hostremoval_reference FASTA supplied. Check input." }
 if (params.shortread_hostremoval_reference ) { ch_reference       = file(params.shortread_hostremoval_reference) }
 if (params.shortread_hostremoval_index     ) { ch_reference_index = file(params.shortread_hostremoval_index    ) } else { ch_reference_index = [] }
@ -175,10 +175,11 @@ workflow TAXPROFILER {
                meta, reads ->
                [ meta, [ reads ].flatten() ]
            }
            .mix( INPUT_CHECK.out.fasta )
    } else {
        ch_reads_runmerged = ch_shortreads_hostremoved
-            .mix( ch_longreads_preprocessed )
+            .mix( ch_longreads_preprocessed, INPUT_CHECK.out.fasta )
    }
    /*