1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-10 23:53:10 +00:00

Merge pull request #158 from nf-core/adapterremoval-fix

Fix adapterremoval adapter conflict and add optional adapter list file
This commit is contained in:
James A. Fellows Yates 2022-11-03 11:36:14 +01:00 committed by GitHub
commit 1f31f88635
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 46 additions and 21 deletions

View file

@ -62,7 +62,7 @@ process {
ext.args = [ ext.args = [
// trimming options // trimming options
params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "", params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "",
params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "", params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
// filtering options // filtering options
"--length_required ${params.shortread_qc_minlength}", "--length_required ${params.shortread_qc_minlength}",
(params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp') ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : '' (params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp') ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : ''
@ -82,8 +82,8 @@ process {
params.shortread_qc_excludeunmerged ? '' : "--include_unmerged", params.shortread_qc_excludeunmerged ? '' : "--include_unmerged",
// trimming options // trimming options
params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "", params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "",
params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "", params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
params.shortread_qc_adapter2 ? "--adapter_sequence_r2 ${params.shortread_qc_adapter2}" : "--detect_adapter_for_pe", params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter2 ? "--adapter_sequence_r2 ${params.shortread_qc_adapter2}" : "--detect_adapter_for_pe",
// filtering options // filtering options
"--length_required ${params.shortread_qc_minlength}", "--length_required ${params.shortread_qc_minlength}",
params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp' ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : '' params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp' ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : ''
@ -100,8 +100,7 @@ process {
withName: ADAPTERREMOVAL_SINGLE { withName: ADAPTERREMOVAL_SINGLE {
ext.args = [ ext.args = [
// trimming options // trimming options
params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "", params.shortread_qc_skipadaptertrim ? "--adapter1 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "",
params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "",
// filtering options // filtering options
"--minlength ${params.shortread_qc_minlength}" "--minlength ${params.shortread_qc_minlength}"
].join(' ').trim() ].join(' ').trim()
@ -119,9 +118,8 @@ process {
// collapsing options // collapsing options
params.shortread_qc_mergepairs ? "--collapse" : "", params.shortread_qc_mergepairs ? "--collapse" : "",
// trimming options // trimming options
params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "", params.shortread_qc_skipadaptertrim ? "--adapter1 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "", // adding adapter list happens at module input channel level
params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "", params.shortread_qc_skipadaptertrim ? "--adapter2 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter2 ? "--adapter2 ${params.shortread_qc_adapter2}" : "",
params.shortread_qc_adapter2 ? "--adapter2 ${params.shortread_qc_adapter2}" : "",
// filtering options // filtering options
"--minlength ${params.shortread_qc_minlength}" "--minlength ${params.shortread_qc_minlength}"
].join(' ').trim() ].join(' ').trim()

View file

@ -7,7 +7,7 @@
"nf-core": { "nf-core": {
"adapterremoval": { "adapterremoval": {
"branch": "master", "branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" "git_sha": "ce7cf27e377fdacf7ebe8e75903ec70405ea1659"
}, },
"bbmap/bbduk": { "bbmap/bbduk": {
"branch": "master", "branch": "master",
@ -59,7 +59,7 @@
}, },
"fastp": { "fastp": {
"branch": "master", "branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" "git_sha": "1e49f31e93c56a3832833eef90a02d3cde5a3f7e"
}, },
"fastqc": { "fastqc": {
"branch": "master", "branch": "master",

View file

@ -34,7 +34,7 @@ process ADAPTERREMOVAL {
AdapterRemoval \\ AdapterRemoval \\
--file1 $reads \\ --file1 $reads \\
$args \\ $args \\
$adapterlist \\ $list \\
--basename ${prefix} \\ --basename ${prefix} \\
--threads ${task.cpus} \\ --threads ${task.cpus} \\
--seed 42 \\ --seed 42 \\
@ -61,7 +61,7 @@ process ADAPTERREMOVAL {
--file1 ${reads[0]} \\ --file1 ${reads[0]} \\
--file2 ${reads[1]} \\ --file2 ${reads[1]} \\
$args \\ $args \\
$adapterlist \\ $list \\
--basename ${prefix} \\ --basename ${prefix} \\
--threads $task.cpus \\ --threads $task.cpus \\
--seed 42 \\ --seed 42 \\

View file

@ -9,6 +9,7 @@ process FASTP {
input: input:
tuple val(meta), path(reads) tuple val(meta), path(reads)
path adapter_fasta
val save_trimmed_fail val save_trimmed_fail
val save_merged val save_merged
@ -27,6 +28,7 @@ process FASTP {
script: script:
def args = task.ext.args ?: '' def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}" def prefix = task.ext.prefix ?: "${meta.id}"
def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : ""
def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : ''
// Added soft-links to original fastqs for consistent naming in MultiQC // Added soft-links to original fastqs for consistent naming in MultiQC
// Use single ended for interleaved. Add --interleaved_in in config. // Use single ended for interleaved. Add --interleaved_in in config.
@ -40,6 +42,7 @@ process FASTP {
--thread $task.cpus \\ --thread $task.cpus \\
--json ${prefix}.fastp.json \\ --json ${prefix}.fastp.json \\
--html ${prefix}.fastp.html \\ --html ${prefix}.fastp.html \\
$adapter_list \\
$fail_fastq \\ $fail_fastq \\
$args \\ $args \\
2> ${prefix}.fastp.log \\ 2> ${prefix}.fastp.log \\
@ -61,6 +64,7 @@ process FASTP {
--thread $task.cpus \\ --thread $task.cpus \\
--json ${prefix}.fastp.json \\ --json ${prefix}.fastp.json \\
--html ${prefix}.fastp.html \\ --html ${prefix}.fastp.html \\
$adapter_list \\
$fail_fastq \\ $fail_fastq \\
$args \\ $args \\
2> ${prefix}.fastp.log 2> ${prefix}.fastp.log
@ -82,6 +86,7 @@ process FASTP {
--out2 ${prefix}_2.fastp.fastq.gz \\ --out2 ${prefix}_2.fastp.fastq.gz \\
--json ${prefix}.fastp.json \\ --json ${prefix}.fastp.json \\
--html ${prefix}.fastp.html \\ --html ${prefix}.fastp.html \\
$adapter_list \\
$fail_fastq \\ $fail_fastq \\
$merge_fastq \\ $merge_fastq \\
--thread $task.cpus \\ --thread $task.cpus \\

View file

@ -23,6 +23,10 @@ input:
List of input FastQ files of size 1 and 2 for single-end and paired-end data, List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively. If you wish to run interleaved paired-end data, supply as single-end data respectively. If you wish to run interleaved paired-end data, supply as single-end data
but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module.
- adapter_fasta:
type: file
description: File in FASTA format containing possible adapters to remove.
pattern: "*.{fasta,fna,fas,fa}"
- save_trimmed_fail: - save_trimmed_fail:
type: boolean type: boolean
description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz` description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz`

View file

@ -59,9 +59,9 @@ params {
// Databases // Databases
databases = null databases = null
// FASTQ preprocessing
preprocessing_qc_tool = 'fastqc' preprocessing_qc_tool = 'fastqc'
// FASTQ preprocessing
perform_shortread_qc = false perform_shortread_qc = false
shortread_qc_tool = 'fastp' shortread_qc_tool = 'fastp'
shortread_qc_skipadaptertrim = false shortread_qc_skipadaptertrim = false
@ -69,6 +69,7 @@ params {
shortread_qc_excludeunmerged = false shortread_qc_excludeunmerged = false
shortread_qc_adapter1 = null shortread_qc_adapter1 = null
shortread_qc_adapter2 = null shortread_qc_adapter2 = null
shortread_qc_adapterlist = null
shortread_qc_minlength = 15 shortread_qc_minlength = 15
perform_longread_qc = false perform_longread_qc = false

View file

@ -104,6 +104,12 @@
"description": "Specify adapter 2 nucleotide sequence", "description": "Specify adapter 2 nucleotide sequence",
"help_text": "Specify a custom reverse or R2 adapter sequence to be removed from reads. \n\nIf not set, the selected short-read QC tool's defaults will be used.\n\n> Modifies tool parameter(s):\n> - fastp: `--adapter_sequence`. fastp default: `AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT`\n> - AdapterRemoval: `--adapter1`. AdapteRemoval2 default: `AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT`" "help_text": "Specify a custom reverse or R2 adapter sequence to be removed from reads. \n\nIf not set, the selected short-read QC tool's defaults will be used.\n\n> Modifies tool parameter(s):\n> - fastp: `--adapter_sequence`. fastp default: `AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT`\n> - AdapterRemoval: `--adapter1`. AdapteRemoval2 default: `AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT`"
}, },
"shortread_qc_adapterlist": {
"type": "string",
"default": "None",
"description": "Specify a list of all possible adapters to trim. Overrides --shortread_qc_adapter1/2. Formats: .txt (AdapterRemoval) or .fasta. (fastp).",
"help_text": "Allows to supply a file with a list of adapter (combinations) to remove from all files. \n\nOverrides the --shortread_qc_adapter1/--shortread_qc_adapter2 parameters . \n\nFor AdapterRemoval this consists of a two column table with a `.txt` extension: first column represents forward strand, second column for reverse strand. You must supply all possible combinations, one per line, and this list is applied to all files. See AdapterRemoval documentation for more information.\n\nFor fastp this consists of a standard FASTA format with a `.fasta`/`.fa`/`.fna`/`.fas` extension. The adapter sequence in this file should be at least 6bp long, otherwise it will be skipped. fastp trims the adapters present in the FASTA file one by one.\n\n> Modifies AdapterRemoval parameter: --adapter-list\n> Modifies fastp parameter: --adapter_fasta"
},
"shortread_qc_mergepairs": { "shortread_qc_mergepairs": {
"type": "boolean", "type": "boolean",
"fa_icon": "fas fa-toggle-on", "fa_icon": "fas fa-toggle-on",

View file

@ -10,6 +10,7 @@ workflow SHORTREAD_ADAPTERREMOVAL {
take: take:
reads // [[meta], [reads]] reads // [[meta], [reads]]
adapterlist // file
main: main:
ch_versions = Channel.empty() ch_versions = Channel.empty()
@ -21,8 +22,8 @@ workflow SHORTREAD_ADAPTERREMOVAL {
paired: !it[0].single_end paired: !it[0].single_end
} }
ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, [] ) ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, adapterlist )
ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, [] ) ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, adapterlist )
/* /*
* Due to the ~slightly~ very ugly output implementation of the current AdapterRemoval2 version, each file * Due to the ~slightly~ very ugly output implementation of the current AdapterRemoval2 version, each file

View file

@ -8,6 +8,7 @@ include { FASTP as FASTP_PAIRED } from '../../modules/nf-core/fastp/main'
workflow SHORTREAD_FASTP { workflow SHORTREAD_FASTP {
take: take:
reads // [[meta], [reads]] reads // [[meta], [reads]]
adapterlist
main: main:
ch_versions = Channel.empty() ch_versions = Channel.empty()
@ -19,9 +20,9 @@ workflow SHORTREAD_FASTP {
paired: it[0]['single_end'] == false paired: it[0]['single_end'] == false
} }
FASTP_SINGLE ( ch_input_for_fastp.single, false, false ) FASTP_SINGLE ( ch_input_for_fastp.single, adapterlist, false, false )
// Last parameter here turns on merging of PE data // Last parameter here turns on merging of PE data
FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_qc_mergepairs ) FASTP_PAIRED ( ch_input_for_fastp.paired, adapterlist, false, params.shortread_qc_mergepairs )
if ( params.shortread_qc_mergepairs ) { if ( params.shortread_qc_mergepairs ) {
ch_fastp_reads_prepped_pe = FASTP_PAIRED.out.reads_merged ch_fastp_reads_prepped_pe = FASTP_PAIRED.out.reads_merged

View file

@ -11,17 +11,18 @@ include { FALCO as FALCO_PROCESSED } from '../../modules/nf-core/falco/main'
workflow SHORTREAD_PREPROCESSING { workflow SHORTREAD_PREPROCESSING {
take: take:
reads // [ [ meta ], [ reads ] ] reads // [ [ meta ], [ reads ] ]
adapterlist // file
main: main:
ch_versions = Channel.empty() ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty() ch_multiqc_files = Channel.empty()
if ( params.shortread_qc_tool == "fastp" ) { if ( params.shortread_qc_tool == "fastp" ) {
ch_processed_reads = SHORTREAD_FASTP ( reads ).reads ch_processed_reads = SHORTREAD_FASTP ( reads, adapterlist ).reads
ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions ) ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc ) ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc )
} else if ( params.shortread_qc_tool == "adapterremoval" ) { } else if ( params.shortread_qc_tool == "adapterremoval" ) {
ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads, adapterlist ).reads
ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions ) ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc ) ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc )
} else { } else {

View file

@ -12,7 +12,8 @@ WorkflowTaxprofiler.initialise(params, log)
// TODO nf-core: Add all file path parameters for the pipeline to the list below // TODO nf-core: Add all file path parameters for the pipeline to the list below
// Check input path parameters to see if they exist // Check input path parameters to see if they exist
def checkPathParamList = [ params.input, params.databases, params.hostremoval_reference, def checkPathParamList = [ params.input, params.databases, params.hostremoval_reference,
params.shortread_hostremoval_index, params.multiqc_config params.shortread_hostremoval_index, params.multiqc_config,
params.shortread_qc_adapterlist
] ]
for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }
@ -103,6 +104,12 @@ workflow TAXPROFILER {
ch_versions = Channel.empty() ch_versions = Channel.empty()
ch_multiqc_logo= Channel.fromPath("$projectDir/docs/images/nf-core-taxprofiler_logo_custom_light.png") ch_multiqc_logo= Channel.fromPath("$projectDir/docs/images/nf-core-taxprofiler_logo_custom_light.png")
adapterlist = params.shortread_qc_adapterlist ? file(params.shortread_qc_adapterlist) : []
if ( params.shortread_qc_adapterlist ) {
if ( params.shortread_qc_tool == 'adapterremoval' && !(adapterlist.extension == 'txt') ) error "[nf-core/taxprofiler] ERROR: AdapterRemoval2 adapter list requires a `.txt` format and extension. Check input: --shortread_qc_adapterlist ${params.shortread_qc_adapterlist}"
if ( params.shortread_qc_tool == 'fastp' && !adapterlist.extension.matches(".*(fa|fasta|fna|fas)") ) error "[nf-core/taxprofiler] ERROR: fastp adapter list requires a `.fasta` format and extension (or fa, fas, fna). Check input: --shortread_qc_adapterlist ${params.shortread_qc_adapterlist}"
}
/* /*
SUBWORKFLOW: Read in samplesheet, validate and stage input files SUBWORKFLOW: Read in samplesheet, validate and stage input files
@ -132,8 +139,9 @@ workflow TAXPROFILER {
/* /*
SUBWORKFLOW: PERFORM PREPROCESSING SUBWORKFLOW: PERFORM PREPROCESSING
*/ */
if ( params.perform_shortread_qc ) { if ( params.perform_shortread_qc ) {
ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq ).reads ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq, adapterlist ).reads
ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions ) ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions )
} else { } else {
ch_shortreads_preprocessed = INPUT_CHECK.out.fastq ch_shortreads_preprocessed = INPUT_CHECK.out.fastq