initial attempt for multiqc module

This commit is contained in:
Abhinav Sharma 2020-11-23 18:31:55 +05:30
parent e02f0de514
commit 2783913122
16 changed files with 10574 additions and 0 deletions

View file

@ -0,0 +1,59 @@
/*
* -----------------------------------------------------
* Utility functions used in nf-core DSL2 module files
* -----------------------------------------------------
*/
/*
* Extract name of software tool from process name using $task.process
*/
def getSoftwareName(task_process) {
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
}
/*
* Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
*/
def initOptions(Map args) {
def Map options = [:]
options.args = args.args ?: ''
options.args2 = args.args2 ?: ''
options.publish_by_id = args.publish_by_id ?: false
options.publish_dir = args.publish_dir ?: ''
options.publish_files = args.publish_files
options.suffix = args.suffix ?: ''
return options
}
/*
* Tidy up and join elements of a list to return a path string
*/
def getPathFromList(path_list) {
def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
return paths.join('/')
}
/*
* Function to save/publish module results
*/
def saveFiles(Map args) {
if (!args.filename.endsWith('.version.txt')) {
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
if (ioptions.publish_by_id) {
path_list.add(args.publish_id)
}
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
return "${getPathFromList(ext_list)}/$args.filename"
}
}
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
}
}
}

32
software/multiqc/main.nf Normal file
View file

@ -0,0 +1,32 @@
// Import generic module functions
include { initOptions; saveFiles; getSoftwareName } from './functions'
params.options = [:]
def options = initOptions(params.options)
process FASTQC {
tag "$meta.id"
label 'process_medium'
publishDir "${params.outdir}",
mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename: filename, options: params.options, publish_dir: getSoftwareName(task.process), publish_id: meta.id) }
conda(params.enable_conda ? "bioconda::multiqc=1.9" : null)
container "quay.io/biocontainers/multiqc:1.9--py_1"
input:
tuple val(meta), path("*.html")
output:
tuple val(meta), path("multiqc_data"), emit: dir
tuple val(meta), path("*.html"), emit: html
path "*.version.txt", emit: version
script:
def software = getSoftwareName(task.process)
def prefix = options.suffix ? "${meta.id}.${options.suffix}" : "${meta.id}"
"""
multiqc --version | sed -e "s/version//g" > ${software}.version.txt
"""
}
}

67
software/multiqc/meta.yml Normal file
View file

@ -0,0 +1,67 @@
name: fastqc
description: Run FastQC on sequenced reads
keywords:
- quality control
- qc
- adapters
- fastq
tools:
- fastqc:
description: |
FastQC gives general quality metrics about your reads.
It provides information about the quality score distribution
across your reads, the per base sequence content (%A/C/G/T).
You get information about adapter contamination and other
overrepresented sequences.
homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/
params:
- outdir:
type: string
description: |
The pipeline's output directory. By default, the module will
output files into `$params.outdir/<SOFTWARE>`
- publish_dir_mode:
type: string
description: |
Value for the Nextflow `publishDir` mode parameter.
Available: symlink, rellink, link, copy, copyNoFollow, move.
- enable_conda:
type: boolean
description: |
Run the module with Conda using the software specified
via the `conda` directive
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- html:
type: file
description: FastQC report
pattern: "*_{fastqc.html}"
- zip:
type: file
description: FastQC report archive
pattern: "*_{fastqc.zip}"
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
authors:
- "@drpatelh"
- "@grst"
- "@ewels"
- "@FelixKrueger"

File diff suppressed because one or more lines are too long

Binary file not shown.

File diff suppressed because one or more lines are too long

Binary file not shown.

21
software/multiqc/test/main.nf Executable file
View file

@ -0,0 +1,21 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { MULTIQC } from '../main.nf' addParams( options: [ publish_dir:'test_single_end' ] )
/*
* Test with single-end data
*/
workflow test_single_end {
def input = []
input = [ [ id:'test', single_end:true ], // meta map
[ file("${baseDir}/input/test_single_end.fastq.gz", checkIfExists: true) ] ]
FASTQC_SE ( input )
}
workflow {
test_single_end()
}

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,20 @@
params {
outdir = "output/"
publish_dir_mode = "copy"
enable_conda = false
}
profiles {
conda {
params.enable_conda = true
}
docker {
docker.enabled = true
docker.runOptions = '-u \$(id -u):\$(id -g)'
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
}
}

View file

@ -0,0 +1,105 @@
[2020-11-23 18:25:23,742] multiqc [DEBUG ] No MultiQC config found: /Users/eklavya/.pyenv/versions/miniconda3-latest/lib/python3.7/site-packages/multiqc_config.yaml
[2020-11-23 18:25:23,742] multiqc [DEBUG ] No MultiQC config found: /Users/eklavya/.multiqc_config.yaml
[2020-11-23 18:25:23,742] multiqc [DEBUG ] No MultiQC config found: multiqc_config.yaml
[2020-11-23 18:25:23,742] multiqc [DEBUG ] Command used: /Users/eklavya/.pyenv/versions/miniconda3-latest/bin/multiqc input/
[2020-11-23 18:25:25,158] multiqc [WARNING] MultiQC Version v1.9 now available!
[2020-11-23 18:25:25,158] multiqc [INFO ] This is MultiQC v1.8
[2020-11-23 18:25:25,158] multiqc [DEBUG ] Command : /Users/eklavya/.pyenv/versions/miniconda3-latest/bin/multiqc input/
[2020-11-23 18:25:25,158] multiqc [DEBUG ] Working dir : /Users/eklavya/projects/code/ORG_nf_core/modules/software/multiqc/test
[2020-11-23 18:25:25,158] multiqc [INFO ] Template : default
[2020-11-23 18:25:25,158] multiqc [DEBUG ] Running Python 3.7.3 (default, Mar 27 2019, 16:54:48) [Clang 4.0.1 (tags/RELEASE_401/final)]
[2020-11-23 18:25:25,159] multiqc [INFO ] Searching : /Users/eklavya/projects/code/ORG_nf_core/modules/software/multiqc/test/input
[2020-11-23 18:25:25,159] multiqc [DEBUG ] Analysing modules: custom_content, conpair, peddy, methylQA, phantompeakqualtools, qualimap, preseq, quast, qorts, rna_seqc, rsem, rseqc, busco, goleft_indexcov, disambiguate, supernova, deeptools, sargasso, verifybamid, mirtrace, happy, homer, macs2, theta2, snpeff, gatk, htseq, bcftools, featureCounts, fgbio, dedup, damageprofiler, biobambam2, mtnucratio, picard, prokka, samblaster, samtools, sexdeterrmine, bamtools, jellyfish, vcftools, longranger, stacks, bbmap, bismark, biscuit, hicexplorer, hicup, hicpro, salmon, kallisto, slamdunk, star, hisat2, tophat, bowtie2, bowtie1, kat, leehom, adapterRemoval, clipandmerge, cutadapt, flexbar, trimmomatic, skewer, sortmerna, biobloomtools, fastq_screen, afterqc, fastp, fastqc, minionqc, mosdepth, clusterflow, bcl2fastq, interop, flash, seqyclean
[2020-11-23 18:25:25,159] multiqc [DEBUG ] Using temporary directory for creating report: /var/folders/zp/63677vtx23d_b2_nd7mm92040000gn/T/tmph8bg8w5g
[2020-11-23 18:25:25,251] multiqc [DEBUG ] Ignoring file as matched an ignore pattern: test_1_fastqc.html
[2020-11-23 18:25:25,251] multiqc [DEBUG ] Ignoring file as matched an ignore pattern: test_2_fastqc.html
[2020-11-23 18:25:25,624] multiqc.plots.bargraph [DEBUG ] Using matplotlib version 3.1.1
[2020-11-23 18:25:25,625] multiqc.plots.linegraph [DEBUG ] Using matplotlib version 3.1.1
[2020-11-23 18:25:25,626] multiqc [DEBUG ] No samples found: custom_content
[2020-11-23 18:25:25,630] multiqc [DEBUG ] No samples found: conpair
[2020-11-23 18:25:25,634] multiqc [DEBUG ] No samples found: peddy
[2020-11-23 18:25:25,638] multiqc [DEBUG ] No samples found: methylQA
[2020-11-23 18:25:25,642] multiqc [DEBUG ] No samples found: phantompeakqualtools
[2020-11-23 18:25:25,647] multiqc [DEBUG ] No samples found: qualimap
[2020-11-23 18:25:25,651] multiqc [DEBUG ] No samples found: preseq
[2020-11-23 18:25:25,655] multiqc [DEBUG ] No samples found: quast
[2020-11-23 18:25:25,659] multiqc [DEBUG ] No samples found: qorts
[2020-11-23 18:25:25,663] multiqc [DEBUG ] No samples found: rna_seqc
[2020-11-23 18:25:25,666] multiqc [DEBUG ] No samples found: rsem
[2020-11-23 18:25:25,675] multiqc [DEBUG ] No samples found: rseqc
[2020-11-23 18:25:25,679] multiqc [DEBUG ] No samples found: busco
[2020-11-23 18:25:25,683] multiqc [DEBUG ] No samples found: goleft_indexcov
[2020-11-23 18:25:25,687] multiqc [DEBUG ] No samples found: disambiguate
[2020-11-23 18:25:25,691] multiqc [DEBUG ] No samples found: supernova
[2020-11-23 18:25:25,700] multiqc [DEBUG ] No samples found: deeptools
[2020-11-23 18:25:25,703] multiqc [DEBUG ] No samples found: sargasso
[2020-11-23 18:25:25,707] multiqc [DEBUG ] No samples found: verifybamid
[2020-11-23 18:25:25,711] multiqc [DEBUG ] No samples found: mirtrace
[2020-11-23 18:25:25,715] multiqc [DEBUG ] No samples found: happy
[2020-11-23 18:25:25,720] multiqc [DEBUG ] No samples found: homer
[2020-11-23 18:25:25,723] multiqc [DEBUG ] No samples found: macs2
[2020-11-23 18:25:25,727] multiqc [DEBUG ] No samples found: theta2
[2020-11-23 18:25:25,731] multiqc [DEBUG ] No samples found: snpeff
[2020-11-23 18:25:25,736] multiqc [DEBUG ] No samples found: gatk
[2020-11-23 18:25:25,741] multiqc [DEBUG ] No samples found: htseq
[2020-11-23 18:25:25,745] multiqc [DEBUG ] No samples found: bcftools
[2020-11-23 18:25:25,750] multiqc [DEBUG ] No samples found: featureCounts
[2020-11-23 18:25:25,754] multiqc [DEBUG ] No samples found: fgbio
[2020-11-23 18:25:25,758] multiqc [DEBUG ] No samples found: dedup
[2020-11-23 18:25:25,762] multiqc [DEBUG ] No samples found: damageprofiler
[2020-11-23 18:25:25,774] multiqc [DEBUG ] No samples found: biobambam2
[2020-11-23 18:25:25,778] multiqc [DEBUG ] No samples found: mtnucratio
[2020-11-23 18:25:25,781] multiqc [DEBUG ] No samples found: picard
[2020-11-23 18:25:25,785] multiqc [DEBUG ] No samples found: prokka
[2020-11-23 18:25:25,789] multiqc [DEBUG ] No samples found: samblaster
[2020-11-23 18:25:25,795] multiqc [DEBUG ] No samples found: samtools
[2020-11-23 18:25:25,799] multiqc [DEBUG ] No samples found: sexdeterrmine
[2020-11-23 18:25:25,803] multiqc [DEBUG ] No samples found: bamtools
[2020-11-23 18:25:25,806] multiqc [DEBUG ] No samples found: jellyfish
[2020-11-23 18:25:25,812] multiqc [DEBUG ] No samples found: vcftools
[2020-11-23 18:25:25,816] multiqc [DEBUG ] No samples found: longranger
[2020-11-23 18:25:25,820] multiqc [DEBUG ] No samples found: stacks
[2020-11-23 18:25:25,830] multiqc [DEBUG ] No samples found: bbmap
[2020-11-23 18:25:25,834] multiqc [DEBUG ] No samples found: bismark
[2020-11-23 18:25:25,838] multiqc [DEBUG ] No samples found: biscuit
[2020-11-23 18:25:25,842] multiqc [DEBUG ] No samples found: hicexplorer
[2020-11-23 18:25:25,846] multiqc [DEBUG ] No samples found: hicup
[2020-11-23 18:25:25,850] multiqc [DEBUG ] No samples found: hicpro
[2020-11-23 18:25:25,853] multiqc [DEBUG ] No samples found: salmon
[2020-11-23 18:25:25,857] multiqc [DEBUG ] No samples found: kallisto
[2020-11-23 18:25:25,861] multiqc [DEBUG ] No samples found: slamdunk
[2020-11-23 18:25:25,865] multiqc [DEBUG ] No samples found: star
[2020-11-23 18:25:25,868] multiqc [DEBUG ] No samples found: hisat2
[2020-11-23 18:25:25,872] multiqc [DEBUG ] No samples found: tophat
[2020-11-23 18:25:25,875] multiqc [DEBUG ] No samples found: bowtie2
[2020-11-23 18:25:25,879] multiqc [DEBUG ] No samples found: bowtie1
[2020-11-23 18:25:25,883] multiqc [DEBUG ] No samples found: kat
[2020-11-23 18:25:25,886] multiqc [DEBUG ] No samples found: leehom
[2020-11-23 18:25:25,890] multiqc [DEBUG ] No samples found: adapterRemoval
[2020-11-23 18:25:25,893] multiqc [DEBUG ] No samples found: clipandmerge
[2020-11-23 18:25:25,897] multiqc [DEBUG ] No samples found: cutadapt
[2020-11-23 18:25:25,901] multiqc [DEBUG ] No samples found: flexbar
[2020-11-23 18:25:25,904] multiqc [DEBUG ] No samples found: trimmomatic
[2020-11-23 18:25:25,908] multiqc [DEBUG ] No samples found: skewer
[2020-11-23 18:25:25,912] multiqc [DEBUG ] No samples found: sortmerna
[2020-11-23 18:25:25,915] multiqc [DEBUG ] No samples found: biobloomtools
[2020-11-23 18:25:25,919] multiqc [DEBUG ] No samples found: fastq_screen
[2020-11-23 18:25:25,922] multiqc [DEBUG ] No samples found: afterqc
[2020-11-23 18:25:25,926] multiqc [DEBUG ] No samples found: fastp
[2020-11-23 18:25:25,947] multiqc.modules.fastqc.fastqc [INFO ] Found 2 reports
[2020-11-23 18:25:26,023] multiqc [DEBUG ] No samples found: minionqc
[2020-11-23 18:25:26,027] multiqc [DEBUG ] No samples found: mosdepth
[2020-11-23 18:25:26,031] multiqc [DEBUG ] No samples found: clusterflow
[2020-11-23 18:25:26,035] multiqc [DEBUG ] No samples found: bcl2fastq
[2020-11-23 18:25:26,039] multiqc [DEBUG ] No samples found: interop
[2020-11-23 18:25:26,043] multiqc [DEBUG ] No samples found: flash
[2020-11-23 18:25:26,046] multiqc [DEBUG ] No samples found: seqyclean
[2020-11-23 18:25:26,050] multiqc [INFO ] Compressing plot data
[2020-11-23 18:25:26,086] multiqc [INFO ] Report : multiqc_report.html
[2020-11-23 18:25:26,086] multiqc [INFO ] Data : multiqc_data
[2020-11-23 18:25:26,087] multiqc [DEBUG ] Moving data file from '/var/folders/zp/63677vtx23d_b2_nd7mm92040000gn/T/tmph8bg8w5g/multiqc_data/multiqc_fastqc.txt' to '/Users/eklavya/projects/code/ORG_nf_core/modules/software/multiqc/test/multiqc_data'
[2020-11-23 18:25:26,087] multiqc [DEBUG ] Moving data file from '/var/folders/zp/63677vtx23d_b2_nd7mm92040000gn/T/tmph8bg8w5g/multiqc_data/multiqc_qualimap_bamqc_genome_results.txt' to '/Users/eklavya/projects/code/ORG_nf_core/modules/software/multiqc/test/multiqc_data'
[2020-11-23 18:25:26,087] multiqc [DEBUG ] Moving data file from '/var/folders/zp/63677vtx23d_b2_nd7mm92040000gn/T/tmph8bg8w5g/multiqc_data/multiqc_general_stats.txt' to '/Users/eklavya/projects/code/ORG_nf_core/modules/software/multiqc/test/multiqc_data'
[2020-11-23 18:25:26,087] multiqc [DEBUG ] Moving data file from '/var/folders/zp/63677vtx23d_b2_nd7mm92040000gn/T/tmph8bg8w5g/multiqc_data/multiqc_sources.txt' to '/Users/eklavya/projects/code/ORG_nf_core/modules/software/multiqc/test/multiqc_data'
[2020-11-23 18:25:26,088] multiqc [DEBUG ] Moving data file from '/var/folders/zp/63677vtx23d_b2_nd7mm92040000gn/T/tmph8bg8w5g/multiqc_data/multiqc_data.json' to '/Users/eklavya/projects/code/ORG_nf_core/modules/software/multiqc/test/multiqc_data'
[2020-11-23 18:25:26,231] multiqc [INFO ] MultiQC complete

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,3 @@
Sample Filename File type Encoding Total Sequences Sequences flagged as poor quality Sequence length %GC total_deduplicated_percentage avg_sequence_length basic_statistics per_base_sequence_quality per_tile_sequence_quality per_sequence_quality_scores per_base_sequence_content per_sequence_gc_content per_base_n_content sequence_length_distribution sequence_duplication_levels overrepresented_sequences adapter_content
test_1 test_1.fastq.gz Conventional base calls Sanger / Illumina 1.9 10000.0 0.0 76.0 44.0 91.72 76.0 pass pass pass pass fail pass pass pass pass warn pass
test_2 test_2.fastq.gz Conventional base calls Sanger / Illumina 1.9 10000.0 0.0 76.0 44.0 91.63 76.0 pass pass pass pass fail pass pass pass pass warn pass

View file

@ -0,0 +1,3 @@
Sample FastQC_mqc-generalstats-fastqc-percent_duplicates FastQC_mqc-generalstats-fastqc-percent_gc FastQC_mqc-generalstats-fastqc-avg_sequence_length FastQC_mqc-generalstats-fastqc-percent_fails FastQC_mqc-generalstats-fastqc-total_sequences
test_1 8.280000000000001 44.0 76.0 9.090909090909092 10000.0
test_2 8.370000000000005 44.0 76.0 9.090909090909092 10000.0

View file

@ -0,0 +1 @@
Sample

View file

@ -0,0 +1,3 @@
Module Section Sample Name Source
FastQC all_sections test_2 /Users/eklavya/projects/code/ORG_nf_core/modules/software/multiqc/test/input/test_2_fastqc.zip
FastQC all_sections test_1 /Users/eklavya/projects/code/ORG_nf_core/modules/software/multiqc/test/input/test_1_fastqc.zip