mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-14 06:23:08 +00:00
Get skeleton read processing to input for profiling
This commit is contained in:
parent
1b893cb039
commit
cf55cc592c
13 changed files with 407 additions and 27 deletions
|
@ -28,8 +28,47 @@ process {
|
||||||
|
|
||||||
withName: FASTQC {
|
withName: FASTQC {
|
||||||
ext.args = '--quiet'
|
ext.args = '--quiet'
|
||||||
|
ext.prefix = { "${meta.id}_${meta.run_accession}_raw" }
|
||||||
|
publishDir = [
|
||||||
|
path: { "${params.outdir}/fastqc/raw" },
|
||||||
|
mode: 'copy',
|
||||||
|
pattern: '*.html'
|
||||||
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
withName: FASTP {
|
||||||
|
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||||
|
// TODO also include option to NOT merge
|
||||||
|
ext.args = [
|
||||||
|
{ ${meta.single_end} } == 0 ? "-m" : '',
|
||||||
|
params.fastp_exclude_unmerged ? '' : "--include_unmerged"
|
||||||
|
].join(' ').trim()
|
||||||
|
publishDir = [
|
||||||
|
path: { "${params.outdir}/fastp" },
|
||||||
|
mode: 'copy',
|
||||||
|
pattern: '*.fastq.gz'
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
withName: FASTQC_POST {
|
||||||
|
ext.args = '--quiet'
|
||||||
|
ext.prefix = { "${meta.id}_${meta.run_accession}_processed" }
|
||||||
|
publishDir = [
|
||||||
|
path: { "${params.outdir}/fastqc/processed" },
|
||||||
|
mode: 'copy',
|
||||||
|
pattern: '*.html'
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
withName: CAT_FASTQ {
|
||||||
|
publishDir = [
|
||||||
|
path: { "${params.outdir}/prepared_sequences" },
|
||||||
|
mode: 'copy',
|
||||||
|
pattern: '*.fastq.gz'
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
withName: CUSTOM_DUMPSOFTWAREVERSIONS {
|
withName: CUSTOM_DUMPSOFTWAREVERSIONS {
|
||||||
publishDir = [
|
publishDir = [
|
||||||
path: { "${params.outdir}/pipeline_info" },
|
path: { "${params.outdir}/pipeline_info" },
|
||||||
|
|
|
@ -22,8 +22,6 @@ params {
|
||||||
// Input data
|
// Input data
|
||||||
// TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
|
// TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
|
||||||
// TODO nf-core: Give any required params for the test so that command line flags are not needed
|
// TODO nf-core: Give any required params for the test so that command line flags are not needed
|
||||||
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv'
|
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
|
||||||
|
|
||||||
// Genome references
|
|
||||||
genome = 'R64-1-1'
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,10 +10,11 @@ class WorkflowTaxprofiler {
|
||||||
public static void initialise(params, log) {
|
public static void initialise(params, log) {
|
||||||
genomeExistsError(params, log)
|
genomeExistsError(params, log)
|
||||||
|
|
||||||
if (!params.fasta) {
|
// TODO update as necessary
|
||||||
log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file."
|
//if (!params.fasta) {
|
||||||
System.exit(1)
|
// log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file."
|
||||||
}
|
// System.exit(1)
|
||||||
|
//}
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
|
@ -3,9 +3,15 @@
|
||||||
"homePage": "https://github.com/nf-core/taxprofiler",
|
"homePage": "https://github.com/nf-core/taxprofiler",
|
||||||
"repos": {
|
"repos": {
|
||||||
"nf-core/modules": {
|
"nf-core/modules": {
|
||||||
|
"cat/fastq": {
|
||||||
|
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
||||||
|
},
|
||||||
"custom/dumpsoftwareversions": {
|
"custom/dumpsoftwareversions": {
|
||||||
"git_sha": "20d8250d9f39ddb05dfb437603aaf99b5c0b2b41"
|
"git_sha": "20d8250d9f39ddb05dfb437603aaf99b5c0b2b41"
|
||||||
},
|
},
|
||||||
|
"fastp": {
|
||||||
|
"git_sha": "d0a1cbb703a130c19f6796c3fce24fbe7dfce789"
|
||||||
|
},
|
||||||
"fastqc": {
|
"fastqc": {
|
||||||
"git_sha": "9d0cad583b9a71a6509b754fdf589cbfbed08961"
|
"git_sha": "9d0cad583b9a71a6509b754fdf589cbfbed08961"
|
||||||
},
|
},
|
||||||
|
|
51
modules/nf-core/modules/cat/fastq/main.nf
generated
Normal file
51
modules/nf-core/modules/cat/fastq/main.nf
generated
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
process CAT_FASTQ {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_low'
|
||||||
|
|
||||||
|
conda (params.enable_conda ? "conda-forge::sed=4.7" : null)
|
||||||
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
|
'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' :
|
||||||
|
'biocontainers/biocontainers:v1.2.0_cv1' }"
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(reads, stageAs: "input*/*")
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path("*.merged.fastq.gz"), emit: reads
|
||||||
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
|
when:
|
||||||
|
task.ext.when == null || task.ext.when
|
||||||
|
|
||||||
|
script:
|
||||||
|
def args = task.ext.args ?: ''
|
||||||
|
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||||
|
def readList = reads.collect{ it.toString() }
|
||||||
|
if (meta.single_end) {
|
||||||
|
if (readList.size > 1) {
|
||||||
|
"""
|
||||||
|
cat ${readList.join(' ')} > ${prefix}.merged.fastq.gz
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//')
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (readList.size > 2) {
|
||||||
|
def read1 = []
|
||||||
|
def read2 = []
|
||||||
|
readList.eachWithIndex{ v, ix -> ( ix & 1 ? read2 : read1 ) << v }
|
||||||
|
"""
|
||||||
|
cat ${read1.join(' ')} > ${prefix}_1.merged.fastq.gz
|
||||||
|
cat ${read2.join(' ')} > ${prefix}_2.merged.fastq.gz
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//')
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
39
modules/nf-core/modules/cat/fastq/meta.yml
generated
Normal file
39
modules/nf-core/modules/cat/fastq/meta.yml
generated
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
name: cat_fastq
|
||||||
|
description: Concatenates fastq files
|
||||||
|
keywords:
|
||||||
|
- fastq
|
||||||
|
- concatenate
|
||||||
|
tools:
|
||||||
|
- cat:
|
||||||
|
description: |
|
||||||
|
The cat utility reads files sequentially, writing them to the standard output.
|
||||||
|
documentation: https://www.gnu.org/software/coreutils/manual/html_node/cat-invocation.html
|
||||||
|
licence: ["GPL-3.0-or-later"]
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- reads:
|
||||||
|
type: list
|
||||||
|
description: |
|
||||||
|
List of input FastQ files to be concatenated.
|
||||||
|
output:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- reads:
|
||||||
|
type: file
|
||||||
|
description: Merged fastq file
|
||||||
|
pattern: "*.{merged.fastq.gz}"
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: "versions.yml"
|
||||||
|
|
||||||
|
authors:
|
||||||
|
- "@joseespinosa"
|
||||||
|
- "@drpatelh"
|
75
modules/nf-core/modules/fastp/main.nf
generated
Normal file
75
modules/nf-core/modules/fastp/main.nf
generated
Normal file
|
@ -0,0 +1,75 @@
|
||||||
|
process FASTP {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_medium'
|
||||||
|
|
||||||
|
conda (params.enable_conda ? 'bioconda::fastp=0.23.2' : null)
|
||||||
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
|
'https://depot.galaxyproject.org/singularity/fastp:0.23.2--h79da9fb_0' :
|
||||||
|
'quay.io/biocontainers/fastp:0.23.2--h79da9fb_0' }"
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(reads)
|
||||||
|
val save_trimmed_fail
|
||||||
|
val save_merged
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path('*.trim.fastq.gz') , optional:true, emit: reads
|
||||||
|
tuple val(meta), path('*.json') , emit: json
|
||||||
|
tuple val(meta), path('*.html') , emit: html
|
||||||
|
tuple val(meta), path('*.log') , emit: log
|
||||||
|
path "versions.yml" , emit: versions
|
||||||
|
tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail
|
||||||
|
tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged
|
||||||
|
|
||||||
|
when:
|
||||||
|
task.ext.when == null || task.ext.when
|
||||||
|
|
||||||
|
script:
|
||||||
|
def args = task.ext.args ?: ''
|
||||||
|
// Added soft-links to original fastqs for consistent naming in MultiQC
|
||||||
|
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||||
|
if (meta.single_end) {
|
||||||
|
def fail_fastq = save_trimmed_fail ? "--failed_out ${prefix}.fail.fastq.gz" : ''
|
||||||
|
"""
|
||||||
|
[ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz
|
||||||
|
fastp \\
|
||||||
|
--in1 ${prefix}.fastq.gz \\
|
||||||
|
--out1 ${prefix}.trim.fastq.gz \\
|
||||||
|
--thread $task.cpus \\
|
||||||
|
--json ${prefix}.fastp.json \\
|
||||||
|
--html ${prefix}.fastp.html \\
|
||||||
|
$fail_fastq \\
|
||||||
|
$args \\
|
||||||
|
2> ${prefix}.fastp.log
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
} else {
|
||||||
|
def fail_fastq = save_trimmed_fail ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : ''
|
||||||
|
def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : ''
|
||||||
|
"""
|
||||||
|
[ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz
|
||||||
|
[ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz
|
||||||
|
fastp \\
|
||||||
|
--in1 ${prefix}_1.fastq.gz \\
|
||||||
|
--in2 ${prefix}_2.fastq.gz \\
|
||||||
|
--out1 ${prefix}_1.trim.fastq.gz \\
|
||||||
|
--out2 ${prefix}_2.trim.fastq.gz \\
|
||||||
|
--json ${prefix}.fastp.json \\
|
||||||
|
--html ${prefix}.fastp.html \\
|
||||||
|
$fail_fastq \\
|
||||||
|
$merge_fastq \\
|
||||||
|
--thread $task.cpus \\
|
||||||
|
--detect_adapter_for_pe \\
|
||||||
|
$args \\
|
||||||
|
2> ${prefix}.fastp.log
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
}
|
68
modules/nf-core/modules/fastp/meta.yml
generated
Normal file
68
modules/nf-core/modules/fastp/meta.yml
generated
Normal file
|
@ -0,0 +1,68 @@
|
||||||
|
name: fastp
|
||||||
|
description: Perform adapter/quality trimming on sequencing reads
|
||||||
|
keywords:
|
||||||
|
- trimming
|
||||||
|
- quality control
|
||||||
|
- fastq
|
||||||
|
tools:
|
||||||
|
- fastp:
|
||||||
|
description: |
|
||||||
|
A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance.
|
||||||
|
documentation: https://github.com/OpenGene/fastp
|
||||||
|
doi: https://doi.org/10.1093/bioinformatics/bty560
|
||||||
|
licence: ["MIT"]
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- reads:
|
||||||
|
type: file
|
||||||
|
description: |
|
||||||
|
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
|
||||||
|
respectively.
|
||||||
|
- save_trimmed_fail:
|
||||||
|
type: boolean
|
||||||
|
description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz`
|
||||||
|
- save_merged:
|
||||||
|
type: boolean
|
||||||
|
description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz`
|
||||||
|
|
||||||
|
output:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- reads:
|
||||||
|
type: file
|
||||||
|
description: The trimmed/modified/unmerged fastq reads
|
||||||
|
pattern: "*trim.fastq.gz"
|
||||||
|
- json:
|
||||||
|
type: file
|
||||||
|
description: Results in JSON format
|
||||||
|
pattern: "*.json"
|
||||||
|
- html:
|
||||||
|
type: file
|
||||||
|
description: Results in HTML format
|
||||||
|
pattern: "*.html"
|
||||||
|
- log:
|
||||||
|
type: file
|
||||||
|
description: fastq log file
|
||||||
|
pattern: "*.log"
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: "versions.yml"
|
||||||
|
- reads_fail:
|
||||||
|
type: file
|
||||||
|
description: Reads the failed the preprocessing
|
||||||
|
pattern: "*fail.fastq.gz"
|
||||||
|
- reads_merged:
|
||||||
|
type: file
|
||||||
|
description: Reads that were successfully merged
|
||||||
|
pattern: "*.{merged.fastq.gz}"
|
||||||
|
authors:
|
||||||
|
- "@drpatelh"
|
||||||
|
- "@kevinmenden"
|
|
@ -33,7 +33,7 @@ params {
|
||||||
help = false
|
help = false
|
||||||
validate_params = true
|
validate_params = true
|
||||||
show_hidden_params = false
|
show_hidden_params = false
|
||||||
schema_ignore_params = 'genomes'
|
schema_ignore_params = 'genomes,fasta'
|
||||||
enable_conda = false
|
enable_conda = false
|
||||||
|
|
||||||
// Config options
|
// Config options
|
||||||
|
@ -50,6 +50,9 @@ params {
|
||||||
max_cpus = 16
|
max_cpus = 16
|
||||||
max_time = '240.h'
|
max_time = '240.h'
|
||||||
|
|
||||||
|
// FASTQ preprocessing
|
||||||
|
fastp_clip_merge = false
|
||||||
|
fastp_exclude_unmerged = true
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load base.config by default for all pipelines
|
// Load base.config by default for all pipelines
|
||||||
|
|
|
@ -56,15 +56,6 @@
|
||||||
"fa_icon": "fas fa-book",
|
"fa_icon": "fas fa-book",
|
||||||
"help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details."
|
"help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details."
|
||||||
},
|
},
|
||||||
"fasta": {
|
|
||||||
"type": "string",
|
|
||||||
"format": "file-path",
|
|
||||||
"mimetype": "text/plain",
|
|
||||||
"pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$",
|
|
||||||
"description": "Path to FASTA genome file.",
|
|
||||||
"help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.",
|
|
||||||
"fa_icon": "far fa-file-code"
|
|
||||||
},
|
|
||||||
"igenomes_base": {
|
"igenomes_base": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"format": "directory-path",
|
"format": "directory-path",
|
||||||
|
|
|
@ -9,14 +9,28 @@ workflow INPUT_CHECK {
|
||||||
samplesheet // file: /path/to/samplesheet.csv
|
samplesheet // file: /path/to/samplesheet.csv
|
||||||
|
|
||||||
main:
|
main:
|
||||||
SAMPLESHEET_CHECK ( samplesheet )
|
parsed_samplesheet = SAMPLESHEET_CHECK ( samplesheet )
|
||||||
.csv
|
.csv
|
||||||
.splitCsv ( header:true, sep:',' )
|
.splitCsv ( header:true, sep:',' )
|
||||||
|
.dump(tag: "split_csv_out")
|
||||||
|
.branch {
|
||||||
|
fasta: it['fasta'] != ''
|
||||||
|
fastq: true
|
||||||
|
}
|
||||||
|
|
||||||
|
parsed_samplesheet.fastq
|
||||||
.map { create_fastq_channels(it) }
|
.map { create_fastq_channels(it) }
|
||||||
.set { reads }
|
.dump(tag: "fastq_channel_init")
|
||||||
|
.set { fastq }
|
||||||
|
|
||||||
|
parsed_samplesheet.fasta
|
||||||
|
.map { create_fasta_channels(it) }
|
||||||
|
.dump(tag: "fasta_channel_init")
|
||||||
|
.set { fasta }
|
||||||
|
|
||||||
emit:
|
emit:
|
||||||
reads // channel: [ val(meta), [ reads ] ]
|
fastq // channel: [ val(meta), [ reads ] ]
|
||||||
|
fasta // channel: [ val(meta), fasta ]
|
||||||
versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ]
|
versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -24,6 +38,8 @@ workflow INPUT_CHECK {
|
||||||
def create_fastq_channels(LinkedHashMap row) {
|
def create_fastq_channels(LinkedHashMap row) {
|
||||||
def meta = [:]
|
def meta = [:]
|
||||||
meta.id = row.sample
|
meta.id = row.sample
|
||||||
|
meta.run_accession = row.run_accession
|
||||||
|
meta.instrument_platform = row.instrument_platform
|
||||||
meta.single_end = row.single_end.toBoolean()
|
meta.single_end = row.single_end.toBoolean()
|
||||||
|
|
||||||
def array = []
|
def array = []
|
||||||
|
@ -40,3 +56,20 @@ def create_fastq_channels(LinkedHashMap row) {
|
||||||
}
|
}
|
||||||
return array
|
return array
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Function to get list of [ meta, fasta ]
|
||||||
|
def create_fasta_channels(LinkedHashMap row) {
|
||||||
|
def meta = [:]
|
||||||
|
meta.id = row.sample
|
||||||
|
meta.run_accession = row.run_accession
|
||||||
|
meta.instrument_platform = row.instrument_platform
|
||||||
|
meta.single_end = true
|
||||||
|
|
||||||
|
def array = []
|
||||||
|
if (!file(row.fasta).exists()) {
|
||||||
|
exit 1, "ERROR: Please check input samplesheet -> FastA file does not exist!\n${row.fasta}"
|
||||||
|
}
|
||||||
|
array = [ meta, [ file(row.fasta) ] ]
|
||||||
|
|
||||||
|
return array
|
||||||
|
}
|
||||||
|
|
|
@ -11,7 +11,7 @@ WorkflowTaxprofiler.initialise(params, log)
|
||||||
|
|
||||||
// TODO nf-core: Add all file path parameters for the pipeline to the list below
|
// TODO nf-core: Add all file path parameters for the pipeline to the list below
|
||||||
// Check input path parameters to see if they exist
|
// Check input path parameters to see if they exist
|
||||||
def checkPathParamList = [ params.input, params.multiqc_config, params.fasta ]
|
def checkPathParamList = [ params.input, params.multiqc_config ]
|
||||||
for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }
|
for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }
|
||||||
|
|
||||||
// Check mandatory parameters
|
// Check mandatory parameters
|
||||||
|
@ -50,6 +50,11 @@ include { FASTQC } from '../modules/nf-core/modules/fastqc/
|
||||||
include { MULTIQC } from '../modules/nf-core/modules/multiqc/main'
|
include { MULTIQC } from '../modules/nf-core/modules/multiqc/main'
|
||||||
include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main'
|
include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main'
|
||||||
|
|
||||||
|
include { FASTP as FASTP_SINGLE } from '../modules/nf-core/modules/fastp/main'
|
||||||
|
include { FASTP as FASTP_PAIRED } from '../modules/nf-core/modules/fastp/main'
|
||||||
|
include { FASTQC as FASTQC_POST } from '../modules/nf-core/modules/fastqc/main'
|
||||||
|
include { CAT_FASTQ } from '../modules/nf-core/modules/cat/fastq/main'
|
||||||
|
|
||||||
/*
|
/*
|
||||||
========================================================================================
|
========================================================================================
|
||||||
RUN MAIN WORKFLOW
|
RUN MAIN WORKFLOW
|
||||||
|
@ -75,7 +80,7 @@ workflow TAXPROFILER {
|
||||||
// MODULE: Run FastQC
|
// MODULE: Run FastQC
|
||||||
//
|
//
|
||||||
FASTQC (
|
FASTQC (
|
||||||
INPUT_CHECK.out.reads
|
INPUT_CHECK.out.fastq
|
||||||
)
|
)
|
||||||
ch_versions = ch_versions.mix(FASTQC.out.versions.first())
|
ch_versions = ch_versions.mix(FASTQC.out.versions.first())
|
||||||
|
|
||||||
|
@ -83,6 +88,71 @@ workflow TAXPROFILER {
|
||||||
ch_versions.unique().collectFile(name: 'collated_versions.yml')
|
ch_versions.unique().collectFile(name: 'collated_versions.yml')
|
||||||
)
|
)
|
||||||
|
|
||||||
|
//
|
||||||
|
// MODULE: Run Clip/Merge/Complexity
|
||||||
|
//
|
||||||
|
// TODO give option to clip only and retain pairs
|
||||||
|
// TODO give option to retain singletons (probably fastp option likely)
|
||||||
|
// TODO move to subworkflow
|
||||||
|
if ( params.fastp_clip_merge ) {
|
||||||
|
|
||||||
|
ch_input_for_fastp = INPUT_CHECK.out.fastq
|
||||||
|
.dump(tag: "pre-fastp_branch")
|
||||||
|
.branch{
|
||||||
|
single: it[0]['single_end'] == true
|
||||||
|
paired: it[0]['single_end'] == false
|
||||||
|
}
|
||||||
|
|
||||||
|
ch_input_for_fastp.single.dump(tag: "input_fastp_single")
|
||||||
|
ch_input_for_fastp.paired.dump(tag: "input_fastp_paired")
|
||||||
|
|
||||||
|
FASTP_SINGLE ( ch_input_for_fastp.single, false, false )
|
||||||
|
FASTP_PAIRED ( ch_input_for_fastp.paired, false, true )
|
||||||
|
|
||||||
|
ch_fastp_reads_prepped = FASTP_PAIRED.out.reads_merged
|
||||||
|
.mix( FASTP_SINGLE.out.reads )
|
||||||
|
.map {
|
||||||
|
meta, reads ->
|
||||||
|
def meta_new = meta.clone()
|
||||||
|
meta_new['single_end'] = 1
|
||||||
|
[ meta_new, reads ]
|
||||||
|
}
|
||||||
|
|
||||||
|
FASTQC_POST ( ch_fastp_reads_prepped )
|
||||||
|
|
||||||
|
ch_versions = ch_versions.mix(FASTP_SINGLE.out.versions.first())
|
||||||
|
ch_versions = ch_versions.mix(FASTP_PAIRED.out.versions.first())
|
||||||
|
|
||||||
|
ch_processed_reads = ch_fastp_reads_prepped
|
||||||
|
|
||||||
|
} else {
|
||||||
|
ch_processed_reads = INPUT_CHECK.out.fastq
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// MODULE: Cat merge runs of same sample
|
||||||
|
ch_processed_for_combine = ch_processed_reads
|
||||||
|
.dump(tag: "prep_for_combine_grouping")
|
||||||
|
.map {
|
||||||
|
meta, reads ->
|
||||||
|
def meta_new = meta.clone()
|
||||||
|
meta_new['run_accession'] = 'combined'
|
||||||
|
[ meta_new, reads ]
|
||||||
|
}
|
||||||
|
.groupTuple ( by: 0 )
|
||||||
|
.branch{
|
||||||
|
combine: it[1].size() >= 2
|
||||||
|
skip: it[1].size() < 2
|
||||||
|
}
|
||||||
|
|
||||||
|
CAT_FASTQ ( ch_processed_for_combine.combine )
|
||||||
|
|
||||||
|
// Ready for profiling!
|
||||||
|
ch_reads_for_profiling = ch_processed_for_combine.skip
|
||||||
|
.dump(tag: "skip_combine")
|
||||||
|
.mix( CAT_FASTQ.out.reads )
|
||||||
|
.dump(tag: "files_for_profiling")
|
||||||
|
|
||||||
//
|
//
|
||||||
// MODULE: MultiQC
|
// MODULE: MultiQC
|
||||||
//
|
//
|
||||||
|
@ -95,6 +165,12 @@ workflow TAXPROFILER {
|
||||||
ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
|
ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
|
||||||
ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
|
ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
|
||||||
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
|
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
|
||||||
|
if (params.fastp_clip_merge) {
|
||||||
|
ch_multiqc_files = ch_multiqc_files.mix(FASTP_SINGLE.out.json.collect{it[1]}.ifEmpty([]))
|
||||||
|
ch_multiqc_files = ch_multiqc_files.mix(FASTP_PAIRED.out.json.collect{it[1]}.ifEmpty([]))
|
||||||
|
ch_multiqc_files = ch_multiqc_files.mix(FASTQC_POST.out.zip.collect{it[1]}.ifEmpty([]))
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
MULTIQC (
|
MULTIQC (
|
||||||
ch_multiqc_files.collect()
|
ch_multiqc_files.collect()
|
||||||
|
|
Loading…
Reference in a new issue