mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-22 04:09:54 +00:00
Merge pull request #158 from nf-core/adapterremoval-fix
Fix adapterremoval adapter conflict and add optional adapter list file
This commit is contained in:
commit
1f31f88635
11 changed files with 46 additions and 21 deletions
|
@ -62,7 +62,7 @@ process {
|
|||
ext.args = [
|
||||
// trimming options
|
||||
params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "",
|
||||
params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
|
||||
params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
|
||||
// filtering options
|
||||
"--length_required ${params.shortread_qc_minlength}",
|
||||
(params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp') ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : ''
|
||||
|
@ -82,8 +82,8 @@ process {
|
|||
params.shortread_qc_excludeunmerged ? '' : "--include_unmerged",
|
||||
// trimming options
|
||||
params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "",
|
||||
params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
|
||||
params.shortread_qc_adapter2 ? "--adapter_sequence_r2 ${params.shortread_qc_adapter2}" : "--detect_adapter_for_pe",
|
||||
params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
|
||||
params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter2 ? "--adapter_sequence_r2 ${params.shortread_qc_adapter2}" : "--detect_adapter_for_pe",
|
||||
// filtering options
|
||||
"--length_required ${params.shortread_qc_minlength}",
|
||||
params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp' ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : ''
|
||||
|
@ -100,8 +100,7 @@ process {
|
|||
withName: ADAPTERREMOVAL_SINGLE {
|
||||
ext.args = [
|
||||
// trimming options
|
||||
params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
|
||||
params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "",
|
||||
params.shortread_qc_skipadaptertrim ? "--adapter1 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "",
|
||||
// filtering options
|
||||
"--minlength ${params.shortread_qc_minlength}"
|
||||
].join(' ').trim()
|
||||
|
@ -119,9 +118,8 @@ process {
|
|||
// collapsing options
|
||||
params.shortread_qc_mergepairs ? "--collapse" : "",
|
||||
// trimming options
|
||||
params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
|
||||
params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "",
|
||||
params.shortread_qc_adapter2 ? "--adapter2 ${params.shortread_qc_adapter2}" : "",
|
||||
params.shortread_qc_skipadaptertrim ? "--adapter1 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "", // adding adapter list happens at module input channel level
|
||||
params.shortread_qc_skipadaptertrim ? "--adapter2 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter2 ? "--adapter2 ${params.shortread_qc_adapter2}" : "",
|
||||
// filtering options
|
||||
"--minlength ${params.shortread_qc_minlength}"
|
||||
].join(' ').trim()
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
"nf-core": {
|
||||
"adapterremoval": {
|
||||
"branch": "master",
|
||||
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
|
||||
"git_sha": "ce7cf27e377fdacf7ebe8e75903ec70405ea1659"
|
||||
},
|
||||
"bbmap/bbduk": {
|
||||
"branch": "master",
|
||||
|
@ -59,7 +59,7 @@
|
|||
},
|
||||
"fastp": {
|
||||
"branch": "master",
|
||||
"git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
|
||||
"git_sha": "1e49f31e93c56a3832833eef90a02d3cde5a3f7e"
|
||||
},
|
||||
"fastqc": {
|
||||
"branch": "master",
|
||||
|
|
4
modules/nf-core/adapterremoval/main.nf
generated
4
modules/nf-core/adapterremoval/main.nf
generated
|
@ -34,7 +34,7 @@ process ADAPTERREMOVAL {
|
|||
AdapterRemoval \\
|
||||
--file1 $reads \\
|
||||
$args \\
|
||||
$adapterlist \\
|
||||
$list \\
|
||||
--basename ${prefix} \\
|
||||
--threads ${task.cpus} \\
|
||||
--seed 42 \\
|
||||
|
@ -61,7 +61,7 @@ process ADAPTERREMOVAL {
|
|||
--file1 ${reads[0]} \\
|
||||
--file2 ${reads[1]} \\
|
||||
$args \\
|
||||
$adapterlist \\
|
||||
$list \\
|
||||
--basename ${prefix} \\
|
||||
--threads $task.cpus \\
|
||||
--seed 42 \\
|
||||
|
|
5
modules/nf-core/fastp/main.nf
generated
5
modules/nf-core/fastp/main.nf
generated
|
@ -9,6 +9,7 @@ process FASTP {
|
|||
|
||||
input:
|
||||
tuple val(meta), path(reads)
|
||||
path adapter_fasta
|
||||
val save_trimmed_fail
|
||||
val save_merged
|
||||
|
||||
|
@ -27,6 +28,7 @@ process FASTP {
|
|||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : ""
|
||||
def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : ''
|
||||
// Added soft-links to original fastqs for consistent naming in MultiQC
|
||||
// Use single ended for interleaved. Add --interleaved_in in config.
|
||||
|
@ -40,6 +42,7 @@ process FASTP {
|
|||
--thread $task.cpus \\
|
||||
--json ${prefix}.fastp.json \\
|
||||
--html ${prefix}.fastp.html \\
|
||||
$adapter_list \\
|
||||
$fail_fastq \\
|
||||
$args \\
|
||||
2> ${prefix}.fastp.log \\
|
||||
|
@ -61,6 +64,7 @@ process FASTP {
|
|||
--thread $task.cpus \\
|
||||
--json ${prefix}.fastp.json \\
|
||||
--html ${prefix}.fastp.html \\
|
||||
$adapter_list \\
|
||||
$fail_fastq \\
|
||||
$args \\
|
||||
2> ${prefix}.fastp.log
|
||||
|
@ -82,6 +86,7 @@ process FASTP {
|
|||
--out2 ${prefix}_2.fastp.fastq.gz \\
|
||||
--json ${prefix}.fastp.json \\
|
||||
--html ${prefix}.fastp.html \\
|
||||
$adapter_list \\
|
||||
$fail_fastq \\
|
||||
$merge_fastq \\
|
||||
--thread $task.cpus \\
|
||||
|
|
4
modules/nf-core/fastp/meta.yml
generated
4
modules/nf-core/fastp/meta.yml
generated
|
@ -23,6 +23,10 @@ input:
|
|||
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
|
||||
respectively. If you wish to run interleaved paired-end data, supply as single-end data
|
||||
but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module.
|
||||
- adapter_fasta:
|
||||
type: file
|
||||
description: File in FASTA format containing possible adapters to remove.
|
||||
pattern: "*.{fasta,fna,fas,fa}"
|
||||
- save_trimmed_fail:
|
||||
type: boolean
|
||||
description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz`
|
||||
|
|
|
@ -59,9 +59,9 @@ params {
|
|||
// Databases
|
||||
databases = null
|
||||
|
||||
// FASTQ preprocessing
|
||||
preprocessing_qc_tool = 'fastqc'
|
||||
|
||||
// FASTQ preprocessing
|
||||
perform_shortread_qc = false
|
||||
shortread_qc_tool = 'fastp'
|
||||
shortread_qc_skipadaptertrim = false
|
||||
|
@ -69,6 +69,7 @@ params {
|
|||
shortread_qc_excludeunmerged = false
|
||||
shortread_qc_adapter1 = null
|
||||
shortread_qc_adapter2 = null
|
||||
shortread_qc_adapterlist = null
|
||||
shortread_qc_minlength = 15
|
||||
|
||||
perform_longread_qc = false
|
||||
|
|
|
@ -104,6 +104,12 @@
|
|||
"description": "Specify adapter 2 nucleotide sequence",
|
||||
"help_text": "Specify a custom reverse or R2 adapter sequence to be removed from reads. \n\nIf not set, the selected short-read QC tool's defaults will be used.\n\n> Modifies tool parameter(s):\n> - fastp: `--adapter_sequence`. fastp default: `AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT`\n> - AdapterRemoval: `--adapter1`. AdapteRemoval2 default: `AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT`"
|
||||
},
|
||||
"shortread_qc_adapterlist": {
|
||||
"type": "string",
|
||||
"default": "None",
|
||||
"description": "Specify a list of all possible adapters to trim. Overrides --shortread_qc_adapter1/2. Formats: .txt (AdapterRemoval) or .fasta. (fastp).",
|
||||
"help_text": "Allows to supply a file with a list of adapter (combinations) to remove from all files. \n\nOverrides the --shortread_qc_adapter1/--shortread_qc_adapter2 parameters . \n\nFor AdapterRemoval this consists of a two column table with a `.txt` extension: first column represents forward strand, second column for reverse strand. You must supply all possible combinations, one per line, and this list is applied to all files. See AdapterRemoval documentation for more information.\n\nFor fastp this consists of a standard FASTA format with a `.fasta`/`.fa`/`.fna`/`.fas` extension. The adapter sequence in this file should be at least 6bp long, otherwise it will be skipped. fastp trims the adapters present in the FASTA file one by one.\n\n> Modifies AdapterRemoval parameter: --adapter-list\n> Modifies fastp parameter: --adapter_fasta"
|
||||
},
|
||||
"shortread_qc_mergepairs": {
|
||||
"type": "boolean",
|
||||
"fa_icon": "fas fa-toggle-on",
|
||||
|
|
|
@ -10,6 +10,7 @@ workflow SHORTREAD_ADAPTERREMOVAL {
|
|||
|
||||
take:
|
||||
reads // [[meta], [reads]]
|
||||
adapterlist // file
|
||||
|
||||
main:
|
||||
ch_versions = Channel.empty()
|
||||
|
@ -21,8 +22,8 @@ workflow SHORTREAD_ADAPTERREMOVAL {
|
|||
paired: !it[0].single_end
|
||||
}
|
||||
|
||||
ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, [] )
|
||||
ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, [] )
|
||||
ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, adapterlist )
|
||||
ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, adapterlist )
|
||||
|
||||
/*
|
||||
* Due to the ~slightly~ very ugly output implementation of the current AdapterRemoval2 version, each file
|
||||
|
|
|
@ -8,6 +8,7 @@ include { FASTP as FASTP_PAIRED } from '../../modules/nf-core/fastp/main'
|
|||
workflow SHORTREAD_FASTP {
|
||||
take:
|
||||
reads // [[meta], [reads]]
|
||||
adapterlist
|
||||
|
||||
main:
|
||||
ch_versions = Channel.empty()
|
||||
|
@ -19,9 +20,9 @@ workflow SHORTREAD_FASTP {
|
|||
paired: it[0]['single_end'] == false
|
||||
}
|
||||
|
||||
FASTP_SINGLE ( ch_input_for_fastp.single, false, false )
|
||||
FASTP_SINGLE ( ch_input_for_fastp.single, adapterlist, false, false )
|
||||
// Last parameter here turns on merging of PE data
|
||||
FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_qc_mergepairs )
|
||||
FASTP_PAIRED ( ch_input_for_fastp.paired, adapterlist, false, params.shortread_qc_mergepairs )
|
||||
|
||||
if ( params.shortread_qc_mergepairs ) {
|
||||
ch_fastp_reads_prepped_pe = FASTP_PAIRED.out.reads_merged
|
||||
|
|
|
@ -11,17 +11,18 @@ include { FALCO as FALCO_PROCESSED } from '../../modules/nf-core/falco/main'
|
|||
workflow SHORTREAD_PREPROCESSING {
|
||||
take:
|
||||
reads // [ [ meta ], [ reads ] ]
|
||||
adapterlist // file
|
||||
|
||||
main:
|
||||
ch_versions = Channel.empty()
|
||||
ch_multiqc_files = Channel.empty()
|
||||
|
||||
if ( params.shortread_qc_tool == "fastp" ) {
|
||||
ch_processed_reads = SHORTREAD_FASTP ( reads ).reads
|
||||
ch_processed_reads = SHORTREAD_FASTP ( reads, adapterlist ).reads
|
||||
ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc )
|
||||
} else if ( params.shortread_qc_tool == "adapterremoval" ) {
|
||||
ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads
|
||||
ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads, adapterlist ).reads
|
||||
ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc )
|
||||
} else {
|
||||
|
|
|
@ -12,7 +12,8 @@ WorkflowTaxprofiler.initialise(params, log)
|
|||
// TODO nf-core: Add all file path parameters for the pipeline to the list below
|
||||
// Check input path parameters to see if they exist
|
||||
def checkPathParamList = [ params.input, params.databases, params.hostremoval_reference,
|
||||
params.shortread_hostremoval_index, params.multiqc_config
|
||||
params.shortread_hostremoval_index, params.multiqc_config,
|
||||
params.shortread_qc_adapterlist
|
||||
]
|
||||
for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }
|
||||
|
||||
|
@ -103,6 +104,12 @@ workflow TAXPROFILER {
|
|||
|
||||
ch_versions = Channel.empty()
|
||||
ch_multiqc_logo= Channel.fromPath("$projectDir/docs/images/nf-core-taxprofiler_logo_custom_light.png")
|
||||
adapterlist = params.shortread_qc_adapterlist ? file(params.shortread_qc_adapterlist) : []
|
||||
|
||||
if ( params.shortread_qc_adapterlist ) {
|
||||
if ( params.shortread_qc_tool == 'adapterremoval' && !(adapterlist.extension == 'txt') ) error "[nf-core/taxprofiler] ERROR: AdapterRemoval2 adapter list requires a `.txt` format and extension. Check input: --shortread_qc_adapterlist ${params.shortread_qc_adapterlist}"
|
||||
if ( params.shortread_qc_tool == 'fastp' && !adapterlist.extension.matches(".*(fa|fasta|fna|fas)") ) error "[nf-core/taxprofiler] ERROR: fastp adapter list requires a `.fasta` format and extension (or fa, fas, fna). Check input: --shortread_qc_adapterlist ${params.shortread_qc_adapterlist}"
|
||||
}
|
||||
|
||||
/*
|
||||
SUBWORKFLOW: Read in samplesheet, validate and stage input files
|
||||
|
@ -132,8 +139,9 @@ workflow TAXPROFILER {
|
|||
/*
|
||||
SUBWORKFLOW: PERFORM PREPROCESSING
|
||||
*/
|
||||
|
||||
if ( params.perform_shortread_qc ) {
|
||||
ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq ).reads
|
||||
ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq, adapterlist ).reads
|
||||
ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions )
|
||||
} else {
|
||||
ch_shortreads_preprocessed = INPUT_CHECK.out.fastq
|
||||
|
|
Loading…
Reference in a new issue