Start working on adding adapterremoval

2024-11-21 21:56:05 +00:00 · 2022-03-31 15:31:45 +02:00 · 2022-03-31 15:31:45 +02:00 · c4c93bd59d
commit c4c93bd59d
parent 323883bd3e
7 changed files with 281 additions and 14 deletions
--- a/modules.json
+++ b/modules.json
@ -3,6 +3,9 @@
    "homePage": "https://github.com/nf-core/taxprofiler",
    "repos": {
        "nf-core/modules": {
+            "adapterremoval": {
+                "git_sha": "f0800157544a82ae222931764483331a81812012"
+            },
            "cat/fastq": {
                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
            },
@ -32,4 +35,4 @@
            }
        }
    }
-}
+}
--- a/modules/nf-core/modules/adapterremoval/main.nf
+++ b/modules/nf-core/modules/adapterremoval/main.nf
@ -0,0 +1,70 @@
+process ADAPTERREMOVAL {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda (params.enable_conda ? "bioconda::adapterremoval=2.3.2" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/adapterremoval:2.3.2--hb7ba0dd_0' :
+        'quay.io/biocontainers/adapterremoval:2.3.2--hb7ba0dd_0' }"
+
+    input:
+    tuple val(meta), path(reads)
+    path(adapterlist)
+
+    output:
+    tuple val(meta), path("${prefix}.truncated.gz")            , optional: true, emit: singles_truncated
+    tuple val(meta), path("${prefix}.discarded.gz")            , optional: true, emit: discarded
+    tuple val(meta), path("${prefix}.pair1.truncated.gz")      , optional: true, emit: pair1_truncated
+    tuple val(meta), path("${prefix}.pair2.truncated.gz")      , optional: true, emit: pair2_truncated
+    tuple val(meta), path("${prefix}.collapsed.gz")            , optional: true, emit: collapsed
+    tuple val(meta), path("${prefix}.collapsed.truncated.gz")  , optional: true, emit: collapsed_truncated
+    tuple val(meta), path("${prefix}.paired.gz")               , optional: true, emit: paired_interleaved
+    tuple val(meta), path('*.log')                             , emit: log
+    path "versions.yml"                                        , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def list = adapterlist ? "--adapter-list ${adapterlist}" : ""
+    prefix = task.ext.prefix ?: "${meta.id}"
+
+    if (meta.single_end) {
+        """
+        AdapterRemoval  \\
+            --file1 $reads \\
+            $args \\
+            $adapterlist \\
+            --basename ${prefix} \\
+            --threads ${task.cpus} \\
+            --settings ${prefix}.log \\
+            --seed 42 \\
+            --gzip
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")
+        END_VERSIONS
+        """
+    } else {
+        """
+        AdapterRemoval  \\
+            --file1 ${reads[0]} \\
+            --file2 ${reads[1]} \\
+            $args \\
+            $adapterlist \\
+            --basename ${prefix} \\
+            --threads $task.cpus \\
+            --settings ${prefix}.log \\
+            --seed 42 \\
+            --gzip
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")
+        END_VERSIONS
+        """
+    }
+
+}
--- a/modules/nf-core/modules/adapterremoval/meta.yml
+++ b/modules/nf-core/modules/adapterremoval/meta.yml
@ -0,0 +1,90 @@
+name: adapterremoval
+description: Trim sequencing adapters and collapse overlapping reads
+keywords:
+  - trimming
+  - adapters
+  - merging
+  - fastq
+tools:
+  - adapterremoval:
+      description: The AdapterRemoval v2 tool for merging and clipping reads.
+      homepage: https://github.com/MikkelSchubert/adapterremoval
+      documentation: https://adapterremoval.readthedocs.io
+      licence: ["GPL v3"]
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+      type: file
+      description: |
+        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+        respectively.
+      pattern: "*.{fq,fastq,fq.gz,fastq.gz}"
+  - adapterlist:
+      type: file
+      description: Optional text file containing list of adapters to look for for removal
+        with one adapter per line. Otherwise will look for default adapters (see
+        AdapterRemoval man page), or can be modified to remove user-specified
+        adapters via ext.args.
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - singles_truncated:
+      type: file
+      description: |
+        Adapter trimmed FastQ files of either single-end reads, or singleton
+        'orphaned' reads from merging of paired-end data (i.e., one of the pair
+        was lost due to filtering thresholds).
+      pattern: "*.truncated.gz"
+  - discarded:
+      type: file
+      description: |
+        Adapter trimmed FastQ files of reads that did not pass filtering
+        thresholds.
+      pattern: "*.discarded.gz"
+  - pair1_truncated:
+      type: file
+      description: |
+        Adapter trimmed R1 FastQ files of paired-end reads that did not merge
+        with their respective R2 pair due to long templates. The respective pair
+        is stored in 'pair2_truncated'.
+      pattern: "*.pair1.truncated.gz"
+  - pair2_truncated:
+      type: file
+      description: |
+        Adapter trimmed R2 FastQ files of paired-end reads that did not merge
+        with their respective R1 pair due to long templates. The respective pair
+        is stored in 'pair1_truncated'.
+      pattern: "*.pair2.truncated.gz"
+  - collapsed:
+      type: file
+      description: |
+        Collapsed FastQ of paired-end reads that successfully merged with their
+        respective R1 pair but were not trimmed.
+      pattern: "*.collapsed.gz"
+  - collapsed_truncated:
+      type: file
+      description: |
+        Collapsed FastQ of paired-end reads that successfully merged with their
+        respective R1 pair and were trimmed of adapter due to sufficient overlap.
+      pattern: "*.collapsed.truncated.gz"
+  - log:
+      type: file
+      description: AdapterRemoval log file
+      pattern: "*.log"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+
+authors:
+  - "@maxibor"
+  - "@jfy133"
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@ -10,7 +10,10 @@
            "type": "object",
            "fa_icon": "fas fa-terminal",
            "description": "Define where the pipeline should find input data and save output data.",
-            "required": ["input", "outdir"],
+            "required": [
+                "input",
+                "outdir"
+            ],
            "properties": {
                "input": {
                    "type": "string",
@ -173,7 +176,14 @@
                    "description": "Method used to save pipeline results to output directory.",
                    "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
                    "fa_icon": "fas fa-copy",
-                    "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
+                    "enum": [
+                        "symlink",
+                        "rellink",
+                        "link",
+                        "copy",
+                        "copyNoFollow",
+                        "move"
+                    ],
                    "hidden": true
                },
                "email_on_fail": {
@ -284,7 +294,11 @@
        },
        "shortread_clipmerge_tool": {
            "type": "string",
-            "default": "fastp"
+            "default": "fastp",
+            "enum": [
+                "fastp",
+                "adapterremoval"
+            ]
        },
        "shortread_clipmerge_skipadaptertrim": {
            "type": "boolean"
@ -294,15 +308,15 @@
        },
        "shortread_clipmerge_adapter1": {
            "type": "string",
-            "default": null
+            "default": "None"
        },
        "shortread_clipmerge_adapter2": {
            "type": "string",
-            "default": null
+            "default": "None"
        },
        "shortread_clipmerge_minlength": {
            "type": "integer",
            "default": 15
        }
    }
-}
+}
--- a/subworkflows/local/shortread_adapterremoval.nf
+++ b/subworkflows/local/shortread_adapterremoval.nf
@ -0,0 +1,91 @@
+/*
+Process short raw reads with AdapterRemoval
+*/
+
+include { ADAPTERREMOVAL as ADAPTERREMOVAL_SINGLE       } from '../../modules/nf-core/modules/adapterremoval/main'
+include { ADAPTERREMOVAL as ADAPTERREMOVAL_PAIRED       } from '../../modules/nf-core/modules/adapterremoval/main'
+include { CAT_FASTQ                                     } from '../../modules/nf-core/modules/cat/fastq/main'
+
+workflow SHORTREAD_ADAPTERREMOVAL {
+
+    take:
+    reads // [[meta], [reads]]
+
+    main:
+    ch_versions = Channel.empty()
+    ch_multiqc_files      = Channel.empty()
+
+    ch_input_for_adapterremoval = reads
+                                    .dump(tag: "pre_adapterremoval_branch")
+                                    .branch{
+                                        single: it[0]['single_end'] == true
+                                        paired: it[0]['single_end'] == false
+                                    }
+
+    ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, [] )
+    ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, [] )
+
+    if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) {
+        ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed
+                                                .mix(
+                                                    ADAPTERREMOVAL_PAIRED.out.collapsed_truncated,
+                                                    ADAPTERREMOVAL_PAIRED.out.singles_truncated,
+                                                    ADAPTERREMOVAL_PAIRED.out.pair1_truncated,
+                                                    ADAPTERREMOVAL_PAIRED.out.pair2_truncated
+                                                    )
+                                                .map {
+                                                    meta, reads ->
+                                                        def meta_new = meta.clone()
+                                                        meta_new['single_end'] = 1
+
+                                                        [ meta_new, reads ]
+                                                    }
+                                                    .groupTuple(by: 0)
+        ch_adapterremoval_reads_prepped_pe = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads
+
+        ch_adapterremoval_reads_prepped = ch_adapterremoval_reads_prepped_pe.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated )
+
+    } else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) {
+            ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed
+                                                    .mix( ADAPTERREMOVAL_PAIRED.out.collapsed_truncated )
+                                                    .map {
+                                                        meta, reads ->
+                                                            def meta_new = meta.clone()
+                                                            meta_new['single_end'] = 1
+
+                                                            [ meta_new, reads ]
+                                                        }
+                                                        .groupTuple(by: 0)
+
+            ch_adapterremoval_reads_prepped_pe = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads
+
+            ch_adapterremoval_reads_prepped = ch_adapterremoval_reads_prepped_pe.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated )
+
+    } else {
+
+        ch_adapterremoval_reads_prepped_pe = ADAPTERREMOVAL_PAIRED.out.pair1_truncated
+                                                .join( ADAPTERREMOVAL_PAIRED.out.pair2_truncated )
+                                                .dump(tag: "pre-group")
+                                                .groupTuple(by: 0)
+                                                .dump(tag: "post-group")
+                                                .map { meta, pair1, pair2 ->
+                                                        [ meta, [ pair1, pair2 ].flatten() ]
+                                                }
+                                                .dump(tag: "post-map")
+
+
+        ch_adapterremoval_reads_prepped = ch_adapterremoval_reads_prepped_pe
+                                    .mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated )
+    }
+
+    ch_processed_reads = ch_adapterremoval_reads_prepped
+
+    ch_versions = ch_versions.mix( ADAPTERREMOVAL_SINGLE.out.versions.first() )
+    ch_versions = ch_versions.mix( ADAPTERREMOVAL_PAIRED.out.versions.first() )
+    ch_multiqc_files = ch_multiqc_files.mix( ADAPTERREMOVAL_PAIRED.out.log.collect{it[1]}, ADAPTERREMOVAL_SINGLE.out.log.collect{it[1]} )
+
+    emit:
+    reads    = ch_processed_reads   // channel: [ val(meta), [ reads ] ]
+    versions = ch_versions          // channel: [ versions.yml ]
+    mqc      = ch_multiqc_files
+}
--- a/subworkflows/local/shortread_fastp.nf
+++ b/subworkflows/local/shortread_fastp.nf
@ -14,15 +14,11 @@ workflow SHORTREAD_FASTP {
    ch_multiqc_files      = Channel.empty()

    ch_input_for_fastp = reads
-                            .dump(tag: "pre-fastp_branch")
                            .branch{
                                single: it[0]['single_end'] == true
                                paired: it[0]['single_end'] == false
                            }

-    ch_input_for_fastp.single.dump(tag: "input_fastp_single")
-    ch_input_for_fastp.paired.dump(tag: "input_fastp_paired")
-
    FASTP_SINGLE ( ch_input_for_fastp.single, false, false )
    // Last parameter here turns on merging of PE data
    FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_clipmerge_mergepairs )
@ -46,13 +42,11 @@ workflow SHORTREAD_FASTP {
    ch_versions = ch_versions.mix(FASTP_SINGLE.out.versions.first())
    ch_versions = ch_versions.mix(FASTP_PAIRED.out.versions.first())

-    ch_processed_reads = ch_fastp_reads_prepped.dump(tag: "ch_fastp_reads_prepped")
+    ch_processed_reads = ch_fastp_reads_prepped

    ch_multiqc_files = ch_multiqc_files.mix( FASTP_SINGLE.out.json.collect{it[1]} )
    ch_multiqc_files = ch_multiqc_files.mix( FASTP_PAIRED.out.json.collect{it[1]} )

-    ch_multiqc_files.dump(tag: "preprocessing_fastp_mqc_final")
-
    emit:
    reads    = ch_processed_reads   // channel: [ val(meta), [ reads ] ]
    versions = ch_versions          // channel: [ versions.yml ]
--- a/subworkflows/local/shortread_preprocessing.nf
+++ b/subworkflows/local/shortread_preprocessing.nf
@ -4,6 +4,7 @@


 include { SHORTREAD_FASTP             } from './shortread_fastp'
+include { SHORTREAD_ADAPTERREMOVAL    } from './shortread_adapterremoval'
 include { FASTQC as FASTQC_PROCESSED       } from '../../modules/nf-core/modules/fastqc/main'

 workflow SHORTREAD_PREPROCESSING {
@ -18,6 +19,10 @@ workflow SHORTREAD_PREPROCESSING {
        ch_processed_reads = SHORTREAD_FASTP ( reads ).reads
        ch_versions        =  ch_versions.mix( SHORTREAD_FASTP.out.versions )
        ch_multiqc_files   =  ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc )
+    } else if ( params.shortread_clipmerge_tool == "adapterremoval" ) {
+        ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads
+        ch_versions        =  ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions )
+        ch_multiqc_files   =  ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc )
    } else {
        ch_processed_reads = reads
    }