diff --git a/conf/modules.config b/conf/modules.config index 397e0c2..f21d1ae 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -62,7 +62,7 @@ process { ext.args = [ // trimming options params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "", - params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "", + params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "", // filtering options "--length_required ${params.shortread_qc_minlength}", (params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp') ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : '' @@ -82,8 +82,8 @@ process { params.shortread_qc_excludeunmerged ? '' : "--include_unmerged", // trimming options params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "", - params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "", - params.shortread_qc_adapter2 ? "--adapter_sequence_r2 ${params.shortread_qc_adapter2}" : "--detect_adapter_for_pe", + params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "", + params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter2 ? "--adapter_sequence_r2 ${params.shortread_qc_adapter2}" : "--detect_adapter_for_pe", // filtering options "--length_required ${params.shortread_qc_minlength}", params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp' ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : '' @@ -100,8 +100,7 @@ process { withName: ADAPTERREMOVAL_SINGLE { ext.args = [ // trimming options - params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "", - params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "", + params.shortread_qc_skipadaptertrim ? "--adapter1 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "", // filtering options "--minlength ${params.shortread_qc_minlength}" ].join(' ').trim() @@ -119,9 +118,8 @@ process { // collapsing options params.shortread_qc_mergepairs ? "--collapse" : "", // trimming options - params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "", - params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "", - params.shortread_qc_adapter2 ? "--adapter2 ${params.shortread_qc_adapter2}" : "", + params.shortread_qc_skipadaptertrim ? "--adapter1 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "", // adding adapter list happens at module input channel level + params.shortread_qc_skipadaptertrim ? "--adapter2 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter2 ? "--adapter2 ${params.shortread_qc_adapter2}" : "", // filtering options "--minlength ${params.shortread_qc_minlength}" ].join(' ').trim() diff --git a/modules.json b/modules.json index be3d193..7eb6fb5 100644 --- a/modules.json +++ b/modules.json @@ -7,7 +7,7 @@ "nf-core": { "adapterremoval": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + "git_sha": "ce7cf27e377fdacf7ebe8e75903ec70405ea1659" }, "bbmap/bbduk": { "branch": "master", @@ -59,7 +59,7 @@ }, "fastp": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + "git_sha": "1e49f31e93c56a3832833eef90a02d3cde5a3f7e" }, "fastqc": { "branch": "master", diff --git a/modules/nf-core/adapterremoval/main.nf b/modules/nf-core/adapterremoval/main.nf index 0e17c05..643c141 100644 --- a/modules/nf-core/adapterremoval/main.nf +++ b/modules/nf-core/adapterremoval/main.nf @@ -34,7 +34,7 @@ process ADAPTERREMOVAL { AdapterRemoval \\ --file1 $reads \\ $args \\ - $adapterlist \\ + $list \\ --basename ${prefix} \\ --threads ${task.cpus} \\ --seed 42 \\ @@ -61,7 +61,7 @@ process ADAPTERREMOVAL { --file1 ${reads[0]} \\ --file2 ${reads[1]} \\ $args \\ - $adapterlist \\ + $list \\ --basename ${prefix} \\ --threads $task.cpus \\ --seed 42 \\ diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf index 11ea4db..207258a 100644 --- a/modules/nf-core/fastp/main.nf +++ b/modules/nf-core/fastp/main.nf @@ -9,6 +9,7 @@ process FASTP { input: tuple val(meta), path(reads) + path adapter_fasta val save_trimmed_fail val save_merged @@ -27,6 +28,7 @@ process FASTP { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : "" def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' // Added soft-links to original fastqs for consistent naming in MultiQC // Use single ended for interleaved. Add --interleaved_in in config. @@ -40,6 +42,7 @@ process FASTP { --thread $task.cpus \\ --json ${prefix}.fastp.json \\ --html ${prefix}.fastp.html \\ + $adapter_list \\ $fail_fastq \\ $args \\ 2> ${prefix}.fastp.log \\ @@ -61,6 +64,7 @@ process FASTP { --thread $task.cpus \\ --json ${prefix}.fastp.json \\ --html ${prefix}.fastp.html \\ + $adapter_list \\ $fail_fastq \\ $args \\ 2> ${prefix}.fastp.log @@ -82,6 +86,7 @@ process FASTP { --out2 ${prefix}_2.fastp.fastq.gz \\ --json ${prefix}.fastp.json \\ --html ${prefix}.fastp.html \\ + $adapter_list \\ $fail_fastq \\ $merge_fastq \\ --thread $task.cpus \\ diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml index 2368fde..6f6fad7 100644 --- a/modules/nf-core/fastp/meta.yml +++ b/modules/nf-core/fastp/meta.yml @@ -23,6 +23,10 @@ input: List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. If you wish to run interleaved paired-end data, supply as single-end data but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. + - adapter_fasta: + type: file + description: File in FASTA format containing possible adapters to remove. + pattern: "*.{fasta,fna,fas,fa}" - save_trimmed_fail: type: boolean description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz` diff --git a/nextflow.config b/nextflow.config index b369f31..c59d433 100644 --- a/nextflow.config +++ b/nextflow.config @@ -59,9 +59,9 @@ params { // Databases databases = null + // FASTQ preprocessing preprocessing_qc_tool = 'fastqc' - // FASTQ preprocessing perform_shortread_qc = false shortread_qc_tool = 'fastp' shortread_qc_skipadaptertrim = false @@ -69,6 +69,7 @@ params { shortread_qc_excludeunmerged = false shortread_qc_adapter1 = null shortread_qc_adapter2 = null + shortread_qc_adapterlist = null shortread_qc_minlength = 15 perform_longread_qc = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 21e7e96..a66b0f1 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -104,6 +104,12 @@ "description": "Specify adapter 2 nucleotide sequence", "help_text": "Specify a custom reverse or R2 adapter sequence to be removed from reads. \n\nIf not set, the selected short-read QC tool's defaults will be used.\n\n> Modifies tool parameter(s):\n> - fastp: `--adapter_sequence`. fastp default: `AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT`\n> - AdapterRemoval: `--adapter1`. AdapteRemoval2 default: `AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT`" }, + "shortread_qc_adapterlist": { + "type": "string", + "default": "None", + "description": "Specify a list of all possible adapters to trim. Overrides --shortread_qc_adapter1/2. Formats: .txt (AdapterRemoval) or .fasta. (fastp).", + "help_text": "Allows to supply a file with a list of adapter (combinations) to remove from all files. \n\nOverrides the --shortread_qc_adapter1/--shortread_qc_adapter2 parameters . \n\nFor AdapterRemoval this consists of a two column table with a `.txt` extension: first column represents forward strand, second column for reverse strand. You must supply all possible combinations, one per line, and this list is applied to all files. See AdapterRemoval documentation for more information.\n\nFor fastp this consists of a standard FASTA format with a `.fasta`/`.fa`/`.fna`/`.fas` extension. The adapter sequence in this file should be at least 6bp long, otherwise it will be skipped. fastp trims the adapters present in the FASTA file one by one.\n\n> Modifies AdapterRemoval parameter: --adapter-list\n> Modifies fastp parameter: --adapter_fasta" + }, "shortread_qc_mergepairs": { "type": "boolean", "fa_icon": "fas fa-toggle-on", diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf index a5a43fe..6567007 100644 --- a/subworkflows/local/shortread_adapterremoval.nf +++ b/subworkflows/local/shortread_adapterremoval.nf @@ -10,6 +10,7 @@ workflow SHORTREAD_ADAPTERREMOVAL { take: reads // [[meta], [reads]] + adapterlist // file main: ch_versions = Channel.empty() @@ -21,8 +22,8 @@ workflow SHORTREAD_ADAPTERREMOVAL { paired: !it[0].single_end } - ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, [] ) - ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, [] ) + ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, adapterlist ) + ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, adapterlist ) /* * Due to the ~slightly~ very ugly output implementation of the current AdapterRemoval2 version, each file diff --git a/subworkflows/local/shortread_fastp.nf b/subworkflows/local/shortread_fastp.nf index d466041..cac5a27 100644 --- a/subworkflows/local/shortread_fastp.nf +++ b/subworkflows/local/shortread_fastp.nf @@ -8,6 +8,7 @@ include { FASTP as FASTP_PAIRED } from '../../modules/nf-core/fastp/main' workflow SHORTREAD_FASTP { take: reads // [[meta], [reads]] + adapterlist main: ch_versions = Channel.empty() @@ -19,9 +20,9 @@ workflow SHORTREAD_FASTP { paired: it[0]['single_end'] == false } - FASTP_SINGLE ( ch_input_for_fastp.single, false, false ) + FASTP_SINGLE ( ch_input_for_fastp.single, adapterlist, false, false ) // Last parameter here turns on merging of PE data - FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_qc_mergepairs ) + FASTP_PAIRED ( ch_input_for_fastp.paired, adapterlist, false, params.shortread_qc_mergepairs ) if ( params.shortread_qc_mergepairs ) { ch_fastp_reads_prepped_pe = FASTP_PAIRED.out.reads_merged diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index f2cd738..c823e3d 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -11,17 +11,18 @@ include { FALCO as FALCO_PROCESSED } from '../../modules/nf-core/falco/main' workflow SHORTREAD_PREPROCESSING { take: reads // [ [ meta ], [ reads ] ] + adapterlist // file main: ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() if ( params.shortread_qc_tool == "fastp" ) { - ch_processed_reads = SHORTREAD_FASTP ( reads ).reads + ch_processed_reads = SHORTREAD_FASTP ( reads, adapterlist ).reads ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions ) ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc ) } else if ( params.shortread_qc_tool == "adapterremoval" ) { - ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads + ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads, adapterlist ).reads ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions ) ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc ) } else { diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 07660ab..ba52a4f 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -12,7 +12,8 @@ WorkflowTaxprofiler.initialise(params, log) // TODO nf-core: Add all file path parameters for the pipeline to the list below // Check input path parameters to see if they exist def checkPathParamList = [ params.input, params.databases, params.hostremoval_reference, - params.shortread_hostremoval_index, params.multiqc_config + params.shortread_hostremoval_index, params.multiqc_config, + params.shortread_qc_adapterlist ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } @@ -103,6 +104,12 @@ workflow TAXPROFILER { ch_versions = Channel.empty() ch_multiqc_logo= Channel.fromPath("$projectDir/docs/images/nf-core-taxprofiler_logo_custom_light.png") + adapterlist = params.shortread_qc_adapterlist ? file(params.shortread_qc_adapterlist) : [] + + if ( params.shortread_qc_adapterlist ) { + if ( params.shortread_qc_tool == 'adapterremoval' && !(adapterlist.extension == 'txt') ) error "[nf-core/taxprofiler] ERROR: AdapterRemoval2 adapter list requires a `.txt` format and extension. Check input: --shortread_qc_adapterlist ${params.shortread_qc_adapterlist}" + if ( params.shortread_qc_tool == 'fastp' && !adapterlist.extension.matches(".*(fa|fasta|fna|fas)") ) error "[nf-core/taxprofiler] ERROR: fastp adapter list requires a `.fasta` format and extension (or fa, fas, fna). Check input: --shortread_qc_adapterlist ${params.shortread_qc_adapterlist}" + } /* SUBWORKFLOW: Read in samplesheet, validate and stage input files @@ -132,8 +139,9 @@ workflow TAXPROFILER { /* SUBWORKFLOW: PERFORM PREPROCESSING */ + if ( params.perform_shortread_qc ) { - ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq ).reads + ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq, adapterlist ).reads ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions ) } else { ch_shortreads_preprocessed = INPUT_CHECK.out.fastq