mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2025-01-04 21:42:08 -05:00
Update modules required for rnaseq pipeline (#449)
* Update HISAT2 build module * Bump preseq version * Fix tests * Add meta.yml for preseq to fix linting * Auto-detect --genomeSAindexNbases for smaller genomes * Add placeholder to use human data for the tests * Add CSI output option to samtools/index * Fix samtools/index tests
This commit is contained in:
parent
defaca4f1b
commit
d1c6082a66
10 changed files with 131 additions and 38 deletions
|
@ -9,6 +9,7 @@ def VERSION = '2.2.0'
|
||||||
process HISAT2_BUILD {
|
process HISAT2_BUILD {
|
||||||
tag "$fasta"
|
tag "$fasta"
|
||||||
label 'process_high'
|
label 'process_high'
|
||||||
|
label 'process_high_memory'
|
||||||
publishDir "${params.outdir}",
|
publishDir "${params.outdir}",
|
||||||
mode: params.publish_dir_mode,
|
mode: params.publish_dir_mode,
|
||||||
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'index', meta:[:], publish_by_meta:[]) }
|
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'index', meta:[:], publish_by_meta:[]) }
|
||||||
|
@ -26,7 +27,7 @@ process HISAT2_BUILD {
|
||||||
path splicesites
|
path splicesites
|
||||||
|
|
||||||
output:
|
output:
|
||||||
path "hisat2", emit: index
|
path "hisat2" , emit: index
|
||||||
path "*.version.txt", emit: version
|
path "*.version.txt", emit: version
|
||||||
|
|
||||||
script:
|
script:
|
||||||
|
@ -38,17 +39,18 @@ process HISAT2_BUILD {
|
||||||
avail_mem = task.memory.toGiga()
|
avail_mem = task.memory.toGiga()
|
||||||
}
|
}
|
||||||
|
|
||||||
def extract_exons = ''
|
|
||||||
def ss = ''
|
def ss = ''
|
||||||
def exon = ''
|
def exon = ''
|
||||||
if (avail_mem > params.hisat_build_memory) {
|
def extract_exons = ''
|
||||||
log.info "[HISAT2 index build] Over ${params.hisat_build_memory} GB available, so using splice sites and exons in HISAT2 index"
|
def hisat2_build_memory = params.hisat2_build_memory ? (params.hisat2_build_memory as nextflow.util.MemoryUnit).toGiga() : 0
|
||||||
|
if (avail_mem >= hisat2_build_memory) {
|
||||||
|
log.info "[HISAT2 index build] At least ${hisat2_build_memory} GB available, so using splice sites and exons to build HISAT2 index"
|
||||||
extract_exons = "hisat2_extract_exons.py $gtf > ${gtf.baseName}.exons.txt"
|
extract_exons = "hisat2_extract_exons.py $gtf > ${gtf.baseName}.exons.txt"
|
||||||
ss = "--ss $splicesites"
|
ss = "--ss $splicesites"
|
||||||
exon = "--exon ${gtf.baseName}.exons.txt"
|
exon = "--exon ${gtf.baseName}.exons.txt"
|
||||||
} else {
|
} else {
|
||||||
log.info "[HISAT2 index build] Less than ${params.hisat_build_memory} GB available, so NOT using splice sites and exons in HISAT2 index."
|
log.info "[HISAT2 index build] Less than ${hisat2_build_memory} GB available, so NOT using splice sites and exons to build HISAT2 index."
|
||||||
log.info "[HISAT2 index build] Use --hisat_build_memory [small number] to skip this check."
|
log.info "[HISAT2 index build] Use --hisat2_build_memory [small number] to skip this check."
|
||||||
}
|
}
|
||||||
|
|
||||||
def software = getSoftwareName(task.process)
|
def software = getSoftwareName(task.process)
|
||||||
|
|
|
@ -12,11 +12,11 @@ process PRESEQ_LCEXTRAP {
|
||||||
mode: params.publish_dir_mode,
|
mode: params.publish_dir_mode,
|
||||||
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }
|
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }
|
||||||
|
|
||||||
conda (params.enable_conda ? "bioconda::preseq=2.0.3" : null)
|
conda (params.enable_conda ? "bioconda::preseq=3.1.2" : null)
|
||||||
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
|
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
|
||||||
container "https://depot.galaxyproject.org/singularity/preseq:2.0.3--hf53bd2b_3"
|
container "https://depot.galaxyproject.org/singularity/preseq:3.1.2--h06ef8b0_1"
|
||||||
} else {
|
} else {
|
||||||
container "quay.io/biocontainers/preseq:2.0.3--hf53bd2b_3"
|
container "quay.io/biocontainers/preseq:3.1.2--h06ef8b0_1"
|
||||||
}
|
}
|
||||||
|
|
||||||
input:
|
input:
|
||||||
|
|
47
software/preseq/lcextrap/meta.yml
Executable file
47
software/preseq/lcextrap/meta.yml
Executable file
|
@ -0,0 +1,47 @@
|
||||||
|
name: preseq_lcextrap
|
||||||
|
description: Software for predicting library complexity and genome coverage in high-throughput sequencing
|
||||||
|
keywords:
|
||||||
|
- preseq
|
||||||
|
- library
|
||||||
|
- complexity
|
||||||
|
tools:
|
||||||
|
- preseq:
|
||||||
|
description: Software for predicting library complexity and genome coverage in high-throughput sequencing
|
||||||
|
homepage: http://smithlabresearch.org/software/preseq/
|
||||||
|
documentation: None
|
||||||
|
tool_dev_url: None
|
||||||
|
doi: ""
|
||||||
|
licence: ['GPL']
|
||||||
|
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- bam:
|
||||||
|
type: file
|
||||||
|
description: BAM/CRAM/SAM file
|
||||||
|
pattern: "*.{bam,cram,sam}"
|
||||||
|
|
||||||
|
output:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- version:
|
||||||
|
type: file
|
||||||
|
description: File containing software version
|
||||||
|
pattern: "*.{version.txt}"
|
||||||
|
- ccurve:
|
||||||
|
type: file
|
||||||
|
description: File containing output of Preseq lcextrap
|
||||||
|
pattern: "*.{ccurve.txt}"
|
||||||
|
- log:
|
||||||
|
type: file
|
||||||
|
description: Log file containing stderr produced by Preseq
|
||||||
|
pattern: "*.{log}"
|
||||||
|
|
||||||
|
authors:
|
||||||
|
- "@drpatelh"
|
|
@ -22,13 +22,14 @@ process SAMTOOLS_INDEX {
|
||||||
tuple val(meta), path(bam)
|
tuple val(meta), path(bam)
|
||||||
|
|
||||||
output:
|
output:
|
||||||
tuple val(meta), path("*.bai"), emit: bai
|
tuple val(meta), path("*.bai"), optional:true, emit: bai
|
||||||
|
tuple val(meta), path("*.csi"), optional:true, emit: csi
|
||||||
path "*.version.txt" , emit: version
|
path "*.version.txt" , emit: version
|
||||||
|
|
||||||
script:
|
script:
|
||||||
def software = getSoftwareName(task.process)
|
def software = getSoftwareName(task.process)
|
||||||
"""
|
"""
|
||||||
samtools index $bam
|
samtools index $options.args $bam
|
||||||
echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' > ${software}.version.txt
|
echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' > ${software}.version.txt
|
||||||
"""
|
"""
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,6 +34,10 @@ output:
|
||||||
type: file
|
type: file
|
||||||
description: BAM/CRAM/SAM index file
|
description: BAM/CRAM/SAM index file
|
||||||
pattern: "*.{bai,crai,sai}"
|
pattern: "*.{bai,crai,sai}"
|
||||||
|
- csi:
|
||||||
|
type: file
|
||||||
|
description: CSI index file
|
||||||
|
pattern: "*.{csi}"
|
||||||
- version:
|
- version:
|
||||||
type: file
|
type: file
|
||||||
description: File containing software version
|
description: File containing software version
|
||||||
|
|
|
@ -12,11 +12,11 @@ process STAR_GENOMEGENERATE {
|
||||||
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'index', meta:[:], publish_by_meta:[]) }
|
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'index', meta:[:], publish_by_meta:[]) }
|
||||||
|
|
||||||
// Note: 2.7X indices incompatible with AWS iGenomes.
|
// Note: 2.7X indices incompatible with AWS iGenomes.
|
||||||
conda (params.enable_conda ? "bioconda::star=2.6.1d" : null)
|
conda (params.enable_conda ? "bioconda::star=2.6.1d bioconda::samtools=1.10 conda-forge::gawk=5.1.0" : null)
|
||||||
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
|
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
|
||||||
container "https://depot.galaxyproject.org/singularity/star:2.6.1d--0"
|
container "https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0"
|
||||||
} else {
|
} else {
|
||||||
container "quay.io/biocontainers/star:2.6.1d--0"
|
container "quay.io/biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0"
|
||||||
}
|
}
|
||||||
|
|
||||||
input:
|
input:
|
||||||
|
@ -30,6 +30,8 @@ process STAR_GENOMEGENERATE {
|
||||||
script:
|
script:
|
||||||
def software = getSoftwareName(task.process)
|
def software = getSoftwareName(task.process)
|
||||||
def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : ''
|
def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : ''
|
||||||
|
def args = options.args.tokenize()
|
||||||
|
if (args.contains('--genomeSAindexNbases')) {
|
||||||
"""
|
"""
|
||||||
mkdir star
|
mkdir star
|
||||||
STAR \\
|
STAR \\
|
||||||
|
@ -43,4 +45,23 @@ process STAR_GENOMEGENERATE {
|
||||||
|
|
||||||
STAR --version | sed -e "s/STAR_//g" > ${software}.version.txt
|
STAR --version | sed -e "s/STAR_//g" > ${software}.version.txt
|
||||||
"""
|
"""
|
||||||
|
} else {
|
||||||
|
"""
|
||||||
|
samtools faidx $fasta
|
||||||
|
NUM_BASES=`gawk '{sum = sum + \$2}END{if ((log(sum)/log(2))/2 - 1 > 14) {printf "%.0f", 14} else {printf "%.0f", (log(sum)/log(2))/2 - 1}}' ${fasta}.fai`
|
||||||
|
|
||||||
|
mkdir star
|
||||||
|
STAR \\
|
||||||
|
--runMode genomeGenerate \\
|
||||||
|
--genomeDir star/ \\
|
||||||
|
--genomeFastaFiles $fasta \\
|
||||||
|
--sjdbGTFfile $gtf \\
|
||||||
|
--runThreadN $task.cpus \\
|
||||||
|
--genomeSAindexNbases \$NUM_BASES \\
|
||||||
|
$memory \\
|
||||||
|
$options.args
|
||||||
|
|
||||||
|
STAR --version | sed -e "s/STAR_//g" > ${software}.version.txt
|
||||||
|
"""
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
- preseq/lcextrap
|
- preseq/lcextrap
|
||||||
files:
|
files:
|
||||||
- path: output/preseq/test.ccurve.txt
|
- path: output/preseq/test.ccurve.txt
|
||||||
md5sum: 76ae04c8eaf19c94e3210bb69da38498
|
md5sum: 1fa5cdd601079329618f61660bee00de
|
||||||
- path: output/preseq/test.command.log
|
- path: output/preseq/test.command.log
|
||||||
|
|
||||||
- name: preseq lcextrap paired-end
|
- name: preseq lcextrap paired-end
|
||||||
|
@ -15,5 +15,5 @@
|
||||||
- preseq/lcextrap
|
- preseq/lcextrap
|
||||||
files:
|
files:
|
||||||
- path: output/preseq/test.ccurve.txt
|
- path: output/preseq/test.ccurve.txt
|
||||||
md5sum: 2836d2fabd2213f097fd7063db550276
|
md5sum: 10e5ea860e87fb6f5dc10f4f20c62040
|
||||||
- path: output/preseq/test.command.log
|
- path: output/preseq/test.command.log
|
||||||
|
|
|
@ -2,12 +2,21 @@
|
||||||
|
|
||||||
nextflow.enable.dsl = 2
|
nextflow.enable.dsl = 2
|
||||||
|
|
||||||
include { SAMTOOLS_INDEX } from '../../../../software/samtools/index/main.nf' addParams( options: [:] )
|
include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_BAI } from '../../../../software/samtools/index/main.nf' addParams( options: [:] )
|
||||||
|
include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_CSI } from '../../../../software/samtools/index/main.nf' addParams( options: [args:'-c'] )
|
||||||
|
|
||||||
workflow test_samtools_index {
|
workflow test_samtools_index_bai {
|
||||||
input = [ [ id:'test', single_end:false ], // meta map
|
input = [ [ id:'test', single_end:false ], // meta map
|
||||||
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
|
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
|
||||||
]
|
]
|
||||||
|
|
||||||
SAMTOOLS_INDEX ( input )
|
SAMTOOLS_INDEX_BAI ( input )
|
||||||
|
}
|
||||||
|
|
||||||
|
workflow test_samtools_index_csi {
|
||||||
|
input = [ [ id:'test', single_end:false ], // meta map
|
||||||
|
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
|
||||||
|
]
|
||||||
|
|
||||||
|
SAMTOOLS_INDEX_CSI ( input )
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +1,17 @@
|
||||||
- name: samtools index
|
- name: samtools index bai
|
||||||
command: nextflow run tests/software/samtools/index -entry test_samtools_index -c tests/config/nextflow.config
|
command: nextflow run tests/software/samtools/index -entry test_samtools_index_bai -c tests/config/nextflow.config
|
||||||
tags:
|
tags:
|
||||||
- samtools
|
- samtools
|
||||||
- samtools/index
|
- samtools/index
|
||||||
files:
|
files:
|
||||||
- path: output/samtools/test_paired_end.sorted.bam.bai
|
- path: output/samtools/test_paired_end.sorted.bam.bai
|
||||||
md5sum: 704c10dd1326482448ca3073fdebc2f4
|
md5sum: 704c10dd1326482448ca3073fdebc2f4
|
||||||
|
|
||||||
|
- name: samtools index csi
|
||||||
|
command: nextflow run tests/software/samtools/index -entry test_samtools_index_csi -c tests/config/nextflow.config
|
||||||
|
tags:
|
||||||
|
- samtools
|
||||||
|
- samtools/index
|
||||||
|
files:
|
||||||
|
- path: output/samtools/test_paired_end.sorted.bam.csi
|
||||||
|
md5sum: 3dd9e3ed959fca075b88bb8dc3cf7dbd
|
||||||
|
|
|
@ -2,11 +2,11 @@
|
||||||
|
|
||||||
nextflow.enable.dsl = 2
|
nextflow.enable.dsl = 2
|
||||||
|
|
||||||
include { STAR_GENOMEGENERATE } from '../../../../software/star/genomegenerate/main.nf' addParams( options: [args: '--genomeSAindexNbases 9'] )
|
include { STAR_GENOMEGENERATE } from '../../../../software/star/genomegenerate/main.nf' addParams( options: [:] )
|
||||||
|
|
||||||
workflow test_star_genomegenerate {
|
workflow test_star_genomegenerate {
|
||||||
fasta = file("${launchDir}/tests/data/generic/fasta/GCF_000019425.1_ASM1942v1_genomic.fna", checkIfExists: true)
|
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
|
||||||
gtf = file("${launchDir}/tests/data/generic/gtf/GCF_000019425.1_ASM1942v1_genomic.gtf", checkIfExists: true)
|
gtf = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)
|
||||||
|
|
||||||
STAR_GENOMEGENERATE ( fasta, gtf )
|
STAR_GENOMEGENERATE ( fasta, gtf )
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue