diff --git a/software/hisat2/build/main.nf b/software/hisat2/build/main.nf index e74c8cae..ce8bfb26 100644 --- a/software/hisat2/build/main.nf +++ b/software/hisat2/build/main.nf @@ -9,6 +9,7 @@ def VERSION = '2.2.0' process HISAT2_BUILD { tag "$fasta" label 'process_high' + label 'process_high_memory' publishDir "${params.outdir}", mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'index', meta:[:], publish_by_meta:[]) } @@ -26,7 +27,7 @@ process HISAT2_BUILD { path splicesites output: - path "hisat2", emit: index + path "hisat2" , emit: index path "*.version.txt", emit: version script: @@ -38,17 +39,18 @@ process HISAT2_BUILD { avail_mem = task.memory.toGiga() } - def extract_exons = '' def ss = '' def exon = '' - if (avail_mem > params.hisat_build_memory) { - log.info "[HISAT2 index build] Over ${params.hisat_build_memory} GB available, so using splice sites and exons in HISAT2 index" + def extract_exons = '' + def hisat2_build_memory = params.hisat2_build_memory ? (params.hisat2_build_memory as nextflow.util.MemoryUnit).toGiga() : 0 + if (avail_mem >= hisat2_build_memory) { + log.info "[HISAT2 index build] At least ${hisat2_build_memory} GB available, so using splice sites and exons to build HISAT2 index" extract_exons = "hisat2_extract_exons.py $gtf > ${gtf.baseName}.exons.txt" ss = "--ss $splicesites" exon = "--exon ${gtf.baseName}.exons.txt" } else { - log.info "[HISAT2 index build] Less than ${params.hisat_build_memory} GB available, so NOT using splice sites and exons in HISAT2 index." - log.info "[HISAT2 index build] Use --hisat_build_memory [small number] to skip this check." + log.info "[HISAT2 index build] Less than ${hisat2_build_memory} GB available, so NOT using splice sites and exons to build HISAT2 index." + log.info "[HISAT2 index build] Use --hisat2_build_memory [small number] to skip this check." } def software = getSoftwareName(task.process) diff --git a/software/preseq/lcextrap/main.nf b/software/preseq/lcextrap/main.nf index 0dd75eba..2f601e4f 100644 --- a/software/preseq/lcextrap/main.nf +++ b/software/preseq/lcextrap/main.nf @@ -12,11 +12,11 @@ process PRESEQ_LCEXTRAP { mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } - conda (params.enable_conda ? "bioconda::preseq=2.0.3" : null) + conda (params.enable_conda ? "bioconda::preseq=3.1.2" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/preseq:2.0.3--hf53bd2b_3" + container "https://depot.galaxyproject.org/singularity/preseq:3.1.2--h06ef8b0_1" } else { - container "quay.io/biocontainers/preseq:2.0.3--hf53bd2b_3" + container "quay.io/biocontainers/preseq:3.1.2--h06ef8b0_1" } input: diff --git a/software/preseq/lcextrap/meta.yml b/software/preseq/lcextrap/meta.yml new file mode 100755 index 00000000..d1716231 --- /dev/null +++ b/software/preseq/lcextrap/meta.yml @@ -0,0 +1,47 @@ +name: preseq_lcextrap +description: Software for predicting library complexity and genome coverage in high-throughput sequencing +keywords: + - preseq + - library + - complexity +tools: + - preseq: + description: Software for predicting library complexity and genome coverage in high-throughput sequencing + homepage: http://smithlabresearch.org/software/preseq/ + documentation: None + tool_dev_url: None + doi: "" + licence: ['GPL'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" + - ccurve: + type: file + description: File containing output of Preseq lcextrap + pattern: "*.{ccurve.txt}" + - log: + type: file + description: Log file containing stderr produced by Preseq + pattern: "*.{log}" + +authors: + - "@drpatelh" diff --git a/software/samtools/index/main.nf b/software/samtools/index/main.nf index a4762743..2f323c81 100644 --- a/software/samtools/index/main.nf +++ b/software/samtools/index/main.nf @@ -22,13 +22,14 @@ process SAMTOOLS_INDEX { tuple val(meta), path(bam) output: - tuple val(meta), path("*.bai"), emit: bai + tuple val(meta), path("*.bai"), optional:true, emit: bai + tuple val(meta), path("*.csi"), optional:true, emit: csi path "*.version.txt" , emit: version script: def software = getSoftwareName(task.process) """ - samtools index $bam + samtools index $options.args $bam echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' > ${software}.version.txt """ } diff --git a/software/samtools/index/meta.yml b/software/samtools/index/meta.yml index 03936e59..5d076e3b 100644 --- a/software/samtools/index/meta.yml +++ b/software/samtools/index/meta.yml @@ -34,6 +34,10 @@ output: type: file description: BAM/CRAM/SAM index file pattern: "*.{bai,crai,sai}" + - csi: + type: file + description: CSI index file + pattern: "*.{csi}" - version: type: file description: File containing software version diff --git a/software/star/genomegenerate/main.nf b/software/star/genomegenerate/main.nf index fb1feea8..b440b415 100644 --- a/software/star/genomegenerate/main.nf +++ b/software/star/genomegenerate/main.nf @@ -12,11 +12,11 @@ process STAR_GENOMEGENERATE { saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'index', meta:[:], publish_by_meta:[]) } // Note: 2.7X indices incompatible with AWS iGenomes. - conda (params.enable_conda ? "bioconda::star=2.6.1d" : null) + conda (params.enable_conda ? "bioconda::star=2.6.1d bioconda::samtools=1.10 conda-forge::gawk=5.1.0" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/star:2.6.1d--0" + container "https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0" } else { - container "quay.io/biocontainers/star:2.6.1d--0" + container "quay.io/biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0" } input: @@ -28,19 +28,40 @@ process STAR_GENOMEGENERATE { path "*.version.txt", emit: version script: - def software = getSoftwareName(task.process) - def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' - """ - mkdir star - STAR \\ - --runMode genomeGenerate \\ - --genomeDir star/ \\ - --genomeFastaFiles $fasta \\ - --sjdbGTFfile $gtf \\ - --runThreadN $task.cpus \\ - $memory \\ - $options.args + def software = getSoftwareName(task.process) + def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + def args = options.args.tokenize() + if (args.contains('--genomeSAindexNbases')) { + """ + mkdir star + STAR \\ + --runMode genomeGenerate \\ + --genomeDir star/ \\ + --genomeFastaFiles $fasta \\ + --sjdbGTFfile $gtf \\ + --runThreadN $task.cpus \\ + $memory \\ + $options.args - STAR --version | sed -e "s/STAR_//g" > ${software}.version.txt - """ + STAR --version | sed -e "s/STAR_//g" > ${software}.version.txt + """ + } else { + """ + samtools faidx $fasta + NUM_BASES=`gawk '{sum = sum + \$2}END{if ((log(sum)/log(2))/2 - 1 > 14) {printf "%.0f", 14} else {printf "%.0f", (log(sum)/log(2))/2 - 1}}' ${fasta}.fai` + + mkdir star + STAR \\ + --runMode genomeGenerate \\ + --genomeDir star/ \\ + --genomeFastaFiles $fasta \\ + --sjdbGTFfile $gtf \\ + --runThreadN $task.cpus \\ + --genomeSAindexNbases \$NUM_BASES \\ + $memory \\ + $options.args + + STAR --version | sed -e "s/STAR_//g" > ${software}.version.txt + """ + } } diff --git a/tests/software/preseq/lcextrap/test.yml b/tests/software/preseq/lcextrap/test.yml index 0de3dfd7..d70f1921 100644 --- a/tests/software/preseq/lcextrap/test.yml +++ b/tests/software/preseq/lcextrap/test.yml @@ -5,7 +5,7 @@ - preseq/lcextrap files: - path: output/preseq/test.ccurve.txt - md5sum: 76ae04c8eaf19c94e3210bb69da38498 + md5sum: 1fa5cdd601079329618f61660bee00de - path: output/preseq/test.command.log - name: preseq lcextrap paired-end @@ -15,5 +15,5 @@ - preseq/lcextrap files: - path: output/preseq/test.ccurve.txt - md5sum: 2836d2fabd2213f097fd7063db550276 + md5sum: 10e5ea860e87fb6f5dc10f4f20c62040 - path: output/preseq/test.command.log diff --git a/tests/software/samtools/index/main.nf b/tests/software/samtools/index/main.nf index 815bd239..d5a29e80 100644 --- a/tests/software/samtools/index/main.nf +++ b/tests/software/samtools/index/main.nf @@ -2,12 +2,21 @@ nextflow.enable.dsl = 2 -include { SAMTOOLS_INDEX } from '../../../../software/samtools/index/main.nf' addParams( options: [:] ) +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_BAI } from '../../../../software/samtools/index/main.nf' addParams( options: [:] ) +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_CSI } from '../../../../software/samtools/index/main.nf' addParams( options: [args:'-c'] ) -workflow test_samtools_index { +workflow test_samtools_index_bai { input = [ [ id:'test', single_end:false ], // meta map file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] - SAMTOOLS_INDEX ( input ) + SAMTOOLS_INDEX_BAI ( input ) +} + +workflow test_samtools_index_csi { + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + ] + + SAMTOOLS_INDEX_CSI ( input ) } diff --git a/tests/software/samtools/index/test.yml b/tests/software/samtools/index/test.yml index 79b541b4..6ff52e42 100644 --- a/tests/software/samtools/index/test.yml +++ b/tests/software/samtools/index/test.yml @@ -1,8 +1,17 @@ -- name: samtools index - command: nextflow run tests/software/samtools/index -entry test_samtools_index -c tests/config/nextflow.config +- name: samtools index bai + command: nextflow run tests/software/samtools/index -entry test_samtools_index_bai -c tests/config/nextflow.config tags: - samtools - samtools/index files: - path: output/samtools/test_paired_end.sorted.bam.bai md5sum: 704c10dd1326482448ca3073fdebc2f4 + +- name: samtools index csi + command: nextflow run tests/software/samtools/index -entry test_samtools_index_csi -c tests/config/nextflow.config + tags: + - samtools + - samtools/index + files: + - path: output/samtools/test_paired_end.sorted.bam.csi + md5sum: 3dd9e3ed959fca075b88bb8dc3cf7dbd diff --git a/tests/software/star/genomegenerate/main.nf b/tests/software/star/genomegenerate/main.nf index b459a39d..12d352c1 100644 --- a/tests/software/star/genomegenerate/main.nf +++ b/tests/software/star/genomegenerate/main.nf @@ -2,11 +2,11 @@ nextflow.enable.dsl = 2 -include { STAR_GENOMEGENERATE } from '../../../../software/star/genomegenerate/main.nf' addParams( options: [args: '--genomeSAindexNbases 9'] ) +include { STAR_GENOMEGENERATE } from '../../../../software/star/genomegenerate/main.nf' addParams( options: [:] ) workflow test_star_genomegenerate { - fasta = file("${launchDir}/tests/data/generic/fasta/GCF_000019425.1_ASM1942v1_genomic.fna", checkIfExists: true) - gtf = file("${launchDir}/tests/data/generic/gtf/GCF_000019425.1_ASM1942v1_genomic.gtf", checkIfExists: true) + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + gtf = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true) STAR_GENOMEGENERATE ( fasta, gtf ) }