1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-22 04:09:54 +00:00

Merge pull request #158 from nf-core/adapterremoval-fix

Fix adapterremoval adapter conflict and add optional adapter list file
This commit is contained in:
James A. Fellows Yates 2022-11-03 11:36:14 +01:00 committed by GitHub
commit 1f31f88635
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 46 additions and 21 deletions

View file

@ -62,7 +62,7 @@ process {
ext.args = [
// trimming options
params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "",
params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
// filtering options
"--length_required ${params.shortread_qc_minlength}",
(params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp') ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : ''
@ -82,8 +82,8 @@ process {
params.shortread_qc_excludeunmerged ? '' : "--include_unmerged",
// trimming options
params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "",
params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
params.shortread_qc_adapter2 ? "--adapter_sequence_r2 ${params.shortread_qc_adapter2}" : "--detect_adapter_for_pe",
params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter2 ? "--adapter_sequence_r2 ${params.shortread_qc_adapter2}" : "--detect_adapter_for_pe",
// filtering options
"--length_required ${params.shortread_qc_minlength}",
params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp' ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : ''
@ -100,8 +100,7 @@ process {
withName: ADAPTERREMOVAL_SINGLE {
ext.args = [
// trimming options
params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "",
params.shortread_qc_skipadaptertrim ? "--adapter1 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "",
// filtering options
"--minlength ${params.shortread_qc_minlength}"
].join(' ').trim()
@ -119,9 +118,8 @@ process {
// collapsing options
params.shortread_qc_mergepairs ? "--collapse" : "",
// trimming options
params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "",
params.shortread_qc_adapter2 ? "--adapter2 ${params.shortread_qc_adapter2}" : "",
params.shortread_qc_skipadaptertrim ? "--adapter1 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "", // adding adapter list happens at module input channel level
params.shortread_qc_skipadaptertrim ? "--adapter2 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter2 ? "--adapter2 ${params.shortread_qc_adapter2}" : "",
// filtering options
"--minlength ${params.shortread_qc_minlength}"
].join(' ').trim()

View file

@ -7,7 +7,7 @@
"nf-core": {
"adapterremoval": {
"branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
"git_sha": "ce7cf27e377fdacf7ebe8e75903ec70405ea1659"
},
"bbmap/bbduk": {
"branch": "master",
@ -59,7 +59,7 @@
},
"fastp": {
"branch": "master",
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
"git_sha": "1e49f31e93c56a3832833eef90a02d3cde5a3f7e"
},
"fastqc": {
"branch": "master",

View file

@ -34,7 +34,7 @@ process ADAPTERREMOVAL {
AdapterRemoval \\
--file1 $reads \\
$args \\
$adapterlist \\
$list \\
--basename ${prefix} \\
--threads ${task.cpus} \\
--seed 42 \\
@ -61,7 +61,7 @@ process ADAPTERREMOVAL {
--file1 ${reads[0]} \\
--file2 ${reads[1]} \\
$args \\
$adapterlist \\
$list \\
--basename ${prefix} \\
--threads $task.cpus \\
--seed 42 \\

View file

@ -9,6 +9,7 @@ process FASTP {
input:
tuple val(meta), path(reads)
path adapter_fasta
val save_trimmed_fail
val save_merged
@ -27,6 +28,7 @@ process FASTP {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : ""
def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : ''
// Added soft-links to original fastqs for consistent naming in MultiQC
// Use single ended for interleaved. Add --interleaved_in in config.
@ -40,6 +42,7 @@ process FASTP {
--thread $task.cpus \\
--json ${prefix}.fastp.json \\
--html ${prefix}.fastp.html \\
$adapter_list \\
$fail_fastq \\
$args \\
2> ${prefix}.fastp.log \\
@ -61,6 +64,7 @@ process FASTP {
--thread $task.cpus \\
--json ${prefix}.fastp.json \\
--html ${prefix}.fastp.html \\
$adapter_list \\
$fail_fastq \\
$args \\
2> ${prefix}.fastp.log
@ -82,6 +86,7 @@ process FASTP {
--out2 ${prefix}_2.fastp.fastq.gz \\
--json ${prefix}.fastp.json \\
--html ${prefix}.fastp.html \\
$adapter_list \\
$fail_fastq \\
$merge_fastq \\
--thread $task.cpus \\

View file

@ -23,6 +23,10 @@ input:
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively. If you wish to run interleaved paired-end data, supply as single-end data
but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module.
- adapter_fasta:
type: file
description: File in FASTA format containing possible adapters to remove.
pattern: "*.{fasta,fna,fas,fa}"
- save_trimmed_fail:
type: boolean
description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz`

View file

@ -59,9 +59,9 @@ params {
// Databases
databases = null
// FASTQ preprocessing
preprocessing_qc_tool = 'fastqc'
// FASTQ preprocessing
perform_shortread_qc = false
shortread_qc_tool = 'fastp'
shortread_qc_skipadaptertrim = false
@ -69,6 +69,7 @@ params {
shortread_qc_excludeunmerged = false
shortread_qc_adapter1 = null
shortread_qc_adapter2 = null
shortread_qc_adapterlist = null
shortread_qc_minlength = 15
perform_longread_qc = false

View file

@ -104,6 +104,12 @@
"description": "Specify adapter 2 nucleotide sequence",
"help_text": "Specify a custom reverse or R2 adapter sequence to be removed from reads. \n\nIf not set, the selected short-read QC tool's defaults will be used.\n\n> Modifies tool parameter(s):\n> - fastp: `--adapter_sequence`. fastp default: `AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT`\n> - AdapterRemoval: `--adapter1`. AdapteRemoval2 default: `AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT`"
},
"shortread_qc_adapterlist": {
"type": "string",
"default": "None",
"description": "Specify a list of all possible adapters to trim. Overrides --shortread_qc_adapter1/2. Formats: .txt (AdapterRemoval) or .fasta. (fastp).",
"help_text": "Allows to supply a file with a list of adapter (combinations) to remove from all files. \n\nOverrides the --shortread_qc_adapter1/--shortread_qc_adapter2 parameters . \n\nFor AdapterRemoval this consists of a two column table with a `.txt` extension: first column represents forward strand, second column for reverse strand. You must supply all possible combinations, one per line, and this list is applied to all files. See AdapterRemoval documentation for more information.\n\nFor fastp this consists of a standard FASTA format with a `.fasta`/`.fa`/`.fna`/`.fas` extension. The adapter sequence in this file should be at least 6bp long, otherwise it will be skipped. fastp trims the adapters present in the FASTA file one by one.\n\n> Modifies AdapterRemoval parameter: --adapter-list\n> Modifies fastp parameter: --adapter_fasta"
},
"shortread_qc_mergepairs": {
"type": "boolean",
"fa_icon": "fas fa-toggle-on",

View file

@ -10,6 +10,7 @@ workflow SHORTREAD_ADAPTERREMOVAL {
take:
reads // [[meta], [reads]]
adapterlist // file
main:
ch_versions = Channel.empty()
@ -21,8 +22,8 @@ workflow SHORTREAD_ADAPTERREMOVAL {
paired: !it[0].single_end
}
ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, [] )
ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, [] )
ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, adapterlist )
ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, adapterlist )
/*
* Due to the ~slightly~ very ugly output implementation of the current AdapterRemoval2 version, each file

View file

@ -8,6 +8,7 @@ include { FASTP as FASTP_PAIRED } from '../../modules/nf-core/fastp/main'
workflow SHORTREAD_FASTP {
take:
reads // [[meta], [reads]]
adapterlist
main:
ch_versions = Channel.empty()
@ -19,9 +20,9 @@ workflow SHORTREAD_FASTP {
paired: it[0]['single_end'] == false
}
FASTP_SINGLE ( ch_input_for_fastp.single, false, false )
FASTP_SINGLE ( ch_input_for_fastp.single, adapterlist, false, false )
// Last parameter here turns on merging of PE data
FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_qc_mergepairs )
FASTP_PAIRED ( ch_input_for_fastp.paired, adapterlist, false, params.shortread_qc_mergepairs )
if ( params.shortread_qc_mergepairs ) {
ch_fastp_reads_prepped_pe = FASTP_PAIRED.out.reads_merged

View file

@ -11,17 +11,18 @@ include { FALCO as FALCO_PROCESSED } from '../../modules/nf-core/falco/main'
workflow SHORTREAD_PREPROCESSING {
take:
reads // [ [ meta ], [ reads ] ]
adapterlist // file
main:
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()
if ( params.shortread_qc_tool == "fastp" ) {
ch_processed_reads = SHORTREAD_FASTP ( reads ).reads
ch_processed_reads = SHORTREAD_FASTP ( reads, adapterlist ).reads
ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc )
} else if ( params.shortread_qc_tool == "adapterremoval" ) {
ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads
ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads, adapterlist ).reads
ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc )
} else {

View file

@ -12,7 +12,8 @@ WorkflowTaxprofiler.initialise(params, log)
// TODO nf-core: Add all file path parameters for the pipeline to the list below
// Check input path parameters to see if they exist
def checkPathParamList = [ params.input, params.databases, params.hostremoval_reference,
params.shortread_hostremoval_index, params.multiqc_config
params.shortread_hostremoval_index, params.multiqc_config,
params.shortread_qc_adapterlist
]
for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }
@ -103,6 +104,12 @@ workflow TAXPROFILER {
ch_versions = Channel.empty()
ch_multiqc_logo= Channel.fromPath("$projectDir/docs/images/nf-core-taxprofiler_logo_custom_light.png")
adapterlist = params.shortread_qc_adapterlist ? file(params.shortread_qc_adapterlist) : []
if ( params.shortread_qc_adapterlist ) {
if ( params.shortread_qc_tool == 'adapterremoval' && !(adapterlist.extension == 'txt') ) error "[nf-core/taxprofiler] ERROR: AdapterRemoval2 adapter list requires a `.txt` format and extension. Check input: --shortread_qc_adapterlist ${params.shortread_qc_adapterlist}"
if ( params.shortread_qc_tool == 'fastp' && !adapterlist.extension.matches(".*(fa|fasta|fna|fas)") ) error "[nf-core/taxprofiler] ERROR: fastp adapter list requires a `.fasta` format and extension (or fa, fas, fna). Check input: --shortread_qc_adapterlist ${params.shortread_qc_adapterlist}"
}
/*
SUBWORKFLOW: Read in samplesheet, validate and stage input files
@ -132,8 +139,9 @@ workflow TAXPROFILER {
/*
SUBWORKFLOW: PERFORM PREPROCESSING
*/
if ( params.perform_shortread_qc ) {
ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq ).reads
ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq, adapterlist ).reads
ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions )
} else {
ch_shortreads_preprocessed = INPUT_CHECK.out.fastq