mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-22 03:59:55 +00:00
Merge branch 'dev' into database-untar
This commit is contained in:
commit
3079f16861
13 changed files with 191 additions and 28 deletions
|
@ -15,6 +15,8 @@
|
|||
* [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/)
|
||||
> Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.
|
||||
|
||||
* [Porechop](https://github.com/rrwick/Porechop)
|
||||
|
||||
## Software packaging/containerisation tools
|
||||
|
||||
* [Anaconda](https://anaconda.com)
|
||||
|
|
|
@ -30,7 +30,7 @@ On release, automated continuous integration tests run the pipeline on a full-si
|
|||
|
||||
1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))
|
||||
2. Performs optional read pre-processing
|
||||
- Adapter clipping and merging
|
||||
- Adapter clipping and merging (short, and nanopore reads)
|
||||
- Low complexity filtering
|
||||
- Host read removal
|
||||
- Run merging
|
||||
|
|
|
@ -173,7 +173,7 @@ def check_samplesheet(file_in, file_out):
|
|||
## Auto-detect paired-end/single-end
|
||||
if sample and fastq_1 and fastq_2: ## Paired-end short reads
|
||||
sample_info.extend(["0", fastq_1, fastq_2, fasta])
|
||||
elif sample and fastq_1 and not fastq_2: ## Single-end short reads
|
||||
elif sample and fastq_1 and not fastq_2: ## Single-end short/long fastq reads
|
||||
sample_info.extend(["1", fastq_1, fastq_2, fasta])
|
||||
elif (
|
||||
sample and fasta and not fastq_1 and not fastq_2
|
||||
|
|
|
@ -41,7 +41,7 @@ process {
|
|||
// TODO also include option to NOT merge
|
||||
ext.args = [
|
||||
{ ${meta.single_end} } == 0 ? "-m" : '',
|
||||
params.fastp_exclude_unmerged ? '' : "--include_unmerged"
|
||||
params.shortread_excludeunmerged ? '' : "--include_unmerged"
|
||||
].join(' ').trim()
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/fastp" },
|
||||
|
@ -50,6 +50,15 @@ process {
|
|||
]
|
||||
}
|
||||
|
||||
withName: PORECHOP {
|
||||
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/porechop" },
|
||||
mode: 'copy',
|
||||
pattern: '*.fastq.gz'
|
||||
]
|
||||
}
|
||||
|
||||
withName: FASTQC_POST {
|
||||
ext.args = '--quiet'
|
||||
ext.prefix = { "${meta.id}_${meta.run_accession}_processed" }
|
||||
|
@ -75,7 +84,7 @@ process {
|
|||
pattern: '*.{rma6,tab,text,sam,log}'
|
||||
]
|
||||
ext.args = { "${meta.db_params}" }
|
||||
ext.when = params.run_malt
|
||||
ext.prefix = { "${meta.id}-${meta.db_name}" }
|
||||
}
|
||||
|
||||
withName: KRAKEN2_KRAKEN2 {
|
||||
|
@ -85,7 +94,6 @@ process {
|
|||
pattern: '*.{fastq.gz,txt}'
|
||||
]
|
||||
ext.args = { "${meta.db_params}" }
|
||||
ext.when = params.run_kraken2
|
||||
ext.prefix = { "${meta.id}-${meta.db_name}" }
|
||||
}
|
||||
|
||||
|
|
|
@ -44,11 +44,11 @@ TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz,
|
|||
TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz,
|
||||
```
|
||||
|
||||
| Column | Description |
|
||||
|----------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). |
|
||||
| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". |
|
||||
| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". |
|
||||
| Column | Description |
|
||||
| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). |
|
||||
| `fastq_1` | Full path to FastQ file for Illumina short reads 1 or Nanopore reads. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". |
|
||||
| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". |
|
||||
|
||||
An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline.
|
||||
|
||||
|
|
|
@ -26,6 +26,8 @@
|
|||
},
|
||||
"untar": {
|
||||
"git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918"
|
||||
"porechop": {
|
||||
"git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
35
modules/nf-core/modules/porechop/main.nf
generated
Normal file
35
modules/nf-core/modules/porechop/main.nf
generated
Normal file
|
@ -0,0 +1,35 @@
|
|||
process PORECHOP {
|
||||
tag "$meta.id"
|
||||
label 'process_medium'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::porechop=0.2.4" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/porechop:0.2.4--py39h7cff6ad_2' :
|
||||
'quay.io/biocontainers/porechop:0.2.4--py39h7cff6ad_2' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(reads)
|
||||
|
||||
output:
|
||||
tuple val(meta), path("*.fastq.gz"), emit: reads
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
"""
|
||||
porechop \\
|
||||
-i $reads \\
|
||||
-t $task.cpus \\
|
||||
$args \\
|
||||
-o ${prefix}.fastq.gz
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
porechop: \$( porechop --version )
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
50
modules/nf-core/modules/porechop/meta.yml
generated
Normal file
50
modules/nf-core/modules/porechop/meta.yml
generated
Normal file
|
@ -0,0 +1,50 @@
|
|||
name: porechop
|
||||
description: Adapter removal and demultiplexing of Oxford Nanopore reads
|
||||
keywords:
|
||||
- adapter
|
||||
- nanopore
|
||||
- demultiplexing
|
||||
tools:
|
||||
- porechop:
|
||||
description: Adapter removal and demultiplexing of Oxford Nanopore reads
|
||||
homepage: "https://github.com/rrwick/Porechop"
|
||||
documentation: "https://github.com/rrwick/Porechop"
|
||||
tool_dev_url: "https://github.com/rrwick/Porechop"
|
||||
doi: "10.1099/mgen.0.000132"
|
||||
licence: ["GPL v3"]
|
||||
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- reads:
|
||||
type: file
|
||||
description: fastq/fastq.gz file
|
||||
pattern: "*.{fastq,fastq.gz,fq,fq.gz}"
|
||||
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
- reads:
|
||||
type: file
|
||||
description: Demultiplexed and/or adapter-trimmed fastq.gz file
|
||||
pattern: "*.{fastq.gz}"
|
||||
|
||||
authors:
|
||||
- "@ggabernet"
|
||||
- "@jasmezz"
|
||||
- "@d4straub"
|
||||
- "@LaurenceKuhl"
|
||||
- "@SusiJo"
|
||||
- "@jonasscheid"
|
||||
- "@jonoave"
|
||||
- "@GokceOGUZ"
|
|
@ -55,8 +55,9 @@ params {
|
|||
databases = null
|
||||
|
||||
// FASTQ preprocessing
|
||||
fastp_clip_merge = false
|
||||
fastp_exclude_unmerged = true
|
||||
shortread_clipmerge = false
|
||||
shortread_excludeunmerged = true
|
||||
longread_clip = false
|
||||
|
||||
// MALT
|
||||
run_malt = false
|
||||
|
|
|
@ -15,14 +15,20 @@ workflow INPUT_CHECK {
|
|||
.dump(tag: "input_split_csv_out")
|
||||
.branch {
|
||||
fasta: it['fasta'] != ''
|
||||
nanopore: it['instrument_platform'] == 'OXFORD_NANOPORE'
|
||||
fastq: true
|
||||
}
|
||||
|
||||
parsed_samplesheet.fastq
|
||||
.map { create_fastq_channels(it) }
|
||||
.map { create_fastq_channel(it) }
|
||||
.dump(tag: "fastq_channel_init")
|
||||
.set { fastq }
|
||||
|
||||
parsed_samplesheet.nanopore
|
||||
.map { create_fastq_channel(it) }
|
||||
.dump(tag: "fastq_nanopore_channel_init")
|
||||
.set { nanopore }
|
||||
|
||||
parsed_samplesheet.fasta
|
||||
.map { create_fasta_channels(it) }
|
||||
.dump(tag: "fasta_channel_init")
|
||||
|
@ -30,6 +36,7 @@ workflow INPUT_CHECK {
|
|||
|
||||
emit:
|
||||
fastq // channel: [ val(meta), [ reads ] ]
|
||||
nanopore // channel: [ val(meta), [ reads ] ]
|
||||
fasta // channel: [ val(meta), fasta ]
|
||||
versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ]
|
||||
}
|
||||
|
@ -51,10 +58,17 @@ def create_fastq_channels(LinkedHashMap row) {
|
|||
if (meta.single_end) {
|
||||
fastq_meta = [ meta, [ file(row.fastq_1) ] ]
|
||||
} else {
|
||||
if (!file(row.fastq_2).exists()) {
|
||||
exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}"
|
||||
if (meta.instrument_platform == 'OXFORD_NANOPORE') {
|
||||
if (row.fastq_2 != '') {
|
||||
exit 1, "ERROR: Please check input samplesheet -> For Oxford Nanopore reads Read 2 FastQ should be empty!\n${row.fastq_2}"
|
||||
}
|
||||
fastq_meta = [ meta, [ file(row.fastq_1) ] ]
|
||||
} else {
|
||||
if (!file(row.fastq_2).exists()) {
|
||||
exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}"
|
||||
}
|
||||
fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ]
|
||||
}
|
||||
fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ]
|
||||
}
|
||||
return fastq_meta
|
||||
}
|
||||
|
|
34
subworkflows/local/longread_preprocessing.nf
Normal file
34
subworkflows/local/longread_preprocessing.nf
Normal file
|
@ -0,0 +1,34 @@
|
|||
|
||||
include { FASTQC as FASTQC_POST } from '../../modules/nf-core/modules/fastqc/main'
|
||||
include { PORECHOP } from '../../modules/nf-core/modules/porechop/main'
|
||||
|
||||
workflow LONGREAD_PREPROCESSING {
|
||||
take:
|
||||
reads
|
||||
|
||||
main:
|
||||
ch_versions = Channel.empty()
|
||||
ch_multiqc_files = Channel.empty()
|
||||
|
||||
PORECHOP ( reads )
|
||||
|
||||
ch_processed_reads = PORECHOP.out.reads
|
||||
.dump(tag: "pre_fastqc_check")
|
||||
.map {
|
||||
meta, reads ->
|
||||
def meta_new = meta.clone()
|
||||
meta_new['single_end'] = 1
|
||||
[ meta_new, reads ]
|
||||
}
|
||||
|
||||
FASTQC_POST ( PORECHOP.out.reads )
|
||||
ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
|
||||
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_POST.out.zip.collect{it[1]} )
|
||||
|
||||
|
||||
emit:
|
||||
reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
|
||||
versions = ch_versions // channel: [ versions.yml ]
|
||||
mqc = ch_multiqc_files
|
||||
}
|
||||
|
|
@ -7,7 +7,7 @@ include { FASTP as FASTP_SINGLE } from '../../modules/nf-core/modules/fast
|
|||
include { FASTP as FASTP_PAIRED } from '../../modules/nf-core/modules/fastp/main'
|
||||
include { FASTQC as FASTQC_POST } from '../../modules/nf-core/modules/fastqc/main'
|
||||
|
||||
workflow FASTQ_PREPROCESSING {
|
||||
workflow SHORTREAD_PREPROCESSING {
|
||||
take:
|
||||
reads // file: /path/to/samplesheet.csv
|
||||
|
||||
|
@ -23,7 +23,7 @@ workflow FASTQ_PREPROCESSING {
|
|||
// TODO move to subworkflow
|
||||
|
||||
|
||||
if ( params.fastp_clip_merge ) {
|
||||
if ( params.shortread_clipmerge ) {
|
||||
|
||||
ch_input_for_fastp = reads
|
||||
.dump(tag: "pre-fastp_branch")
|
|
@ -39,8 +39,8 @@ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multi
|
|||
include { INPUT_CHECK } from '../subworkflows/local/input_check'
|
||||
|
||||
include { DB_CHECK } from '../subworkflows/local/db_check'
|
||||
include { FASTQ_PREPROCESSING } from '../subworkflows/local/preprocessing'
|
||||
|
||||
include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing'
|
||||
include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing'
|
||||
|
||||
/*
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
@ -89,7 +89,7 @@ workflow TAXPROFILER {
|
|||
// MODULE: Run FastQC
|
||||
//
|
||||
FASTQC (
|
||||
INPUT_CHECK.out.fastq
|
||||
INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore )
|
||||
)
|
||||
ch_versions = ch_versions.mix(FASTQC.out.versions.first())
|
||||
|
||||
|
@ -100,14 +100,22 @@ workflow TAXPROFILER {
|
|||
//
|
||||
// PERFORM PREPROCESSING
|
||||
//
|
||||
if ( params.fastp_clip_merge ) {
|
||||
FASTQ_PREPROCESSING ( INPUT_CHECK.out.fastq )
|
||||
if ( params.shortread_clipmerge ) {
|
||||
SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq )
|
||||
}
|
||||
|
||||
if ( params.longread_clip ) {
|
||||
ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads
|
||||
.map { it -> [ it[0], [it[1]] ] }
|
||||
ch_versions = ch_versions.mix(LONGREAD_PREPROCESSING.out.versions.first())
|
||||
} else {
|
||||
ch_longreads_preprocessed = INPUT_CHECK.out.nanopore
|
||||
}
|
||||
|
||||
//
|
||||
// PERFORM RUN MERGING
|
||||
//
|
||||
ch_processed_for_combine = FASTQ_PREPROCESSING.out.reads
|
||||
ch_processed_for_combine = SHORTREAD_PREPROCESSING.out.reads
|
||||
.dump(tag: "prep_for_combine_grouping")
|
||||
.map {
|
||||
meta, reads ->
|
||||
|
@ -134,6 +142,7 @@ workflow TAXPROFILER {
|
|||
|
||||
// output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
|
||||
ch_input_for_profiling = ch_reads_for_profiling
|
||||
.mix( ch_longreads_preprocessed )
|
||||
.combine(DB_CHECK.out.dbs)
|
||||
.dump(tag: "reads_plus_db")
|
||||
.branch {
|
||||
|
@ -175,9 +184,13 @@ workflow TAXPROFILER {
|
|||
//
|
||||
// RUN PROFILING
|
||||
//
|
||||
MALT_RUN ( ch_input_for_malt.reads, params.malt_mode, ch_input_for_malt.db )
|
||||
KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db )
|
||||
if ( params.run_malt ) {
|
||||
MALT_RUN ( ch_input_for_malt.reads, params.malt_mode, ch_input_for_malt.db )
|
||||
}
|
||||
|
||||
if ( params.run_kraken2 ) {
|
||||
KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db )
|
||||
}
|
||||
|
||||
//
|
||||
// MODULE: MultiQC
|
||||
|
@ -191,8 +204,12 @@ workflow TAXPROFILER {
|
|||
ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
|
||||
ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
|
||||
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
|
||||
if (params.fastp_clip_merge) {
|
||||
ch_multiqc_files = ch_multiqc_files.mix(FASTQ_PREPROCESSING.out.mqc)
|
||||
|
||||
if (params.shortread_clipmerge) {
|
||||
ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_PREPROCESSING.out.mqc)
|
||||
}
|
||||
if (params.longread_clip) {
|
||||
ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_PREPROCESSING.out.mqc)
|
||||
}
|
||||
if (params.run_kraken2) {
|
||||
ch_multiqc_files = ch_multiqc_files.mix(KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]))
|
||||
|
|
Loading…
Reference in a new issue