Update salmon/quant to provide '--libType A' option (#536)

* Update salmon/quant to provide '--libType A' option

* Modify lib_type description to  be a string

* Update tests

* lyb_type can be explicitly overwrite

* Update main.nf

* Update main.nf

* Update main.nf

Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>
This commit is contained in:
Jose Espinosa-Carrasco 2021-06-17 15:13:47 +02:00 committed by GitHub
parent c8168bc351
commit e621f78757
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 93 additions and 8 deletions

View file

@ -24,6 +24,7 @@ process SALMON_QUANT {
path gtf path gtf
path transcript_fasta path transcript_fasta
val alignment_mode val alignment_mode
val lib_type
output: output:
tuple val(meta), path("${prefix}"), emit: results tuple val(meta), path("${prefix}"), emit: results
@ -40,12 +41,27 @@ process SALMON_QUANT {
input_reads = "-a $reads" input_reads = "-a $reads"
} }
def strandedness = meta.single_end ? 'U' : 'IU' def strandedness_opts = [
'A', 'U', 'SF', 'SR',
'IS', 'IU' , 'ISF', 'ISR',
'OS', 'OU' , 'OSF', 'OSR',
'MS', 'MU' , 'MSF', 'MSR'
]
def strandedness = 'A'
if (lib_type) {
if (strandedness_opts.contains(lib_type)) {
strandedness = lib_type
} else {
log.info "[Salmon Quant] Invalid library type specified '--libType=${lib_type}', defaulting to auto-detection with '--libType=A'."
}
} else {
strandedness = meta.single_end ? 'U' : 'IU'
if (meta.strandedness == 'forward') { if (meta.strandedness == 'forward') {
strandedness = meta.single_end ? 'SF' : 'ISF' strandedness = meta.single_end ? 'SF' : 'ISF'
} else if (meta.strandedness == 'reverse') { } else if (meta.strandedness == 'reverse') {
strandedness = meta.single_end ? 'SR' : 'ISR' strandedness = meta.single_end ? 'SR' : 'ISR'
} }
}
""" """
salmon quant \\ salmon quant \\
--geneMap $gtf \\ --geneMap $gtf \\

View file

@ -35,6 +35,10 @@ input:
- alignment_mode: - alignment_mode:
type: boolean type: boolean
description: whether to run salmon in alignment mode description: whether to run salmon in alignment mode
- lib_type:
type: string
description: |
Override library type inferred based on strandedness defined in meta object
output: output:
- sample_output: - sample_output:

View file

@ -15,7 +15,7 @@ workflow test_salmon_quant_single_end {
gtf = file(params.test_data['sarscov2']['genome']['genome_gtf'], checkIfExists: true) gtf = file(params.test_data['sarscov2']['genome']['genome_gtf'], checkIfExists: true)
SALMON_INDEX ( genome_fasta, transcript_fasta ) SALMON_INDEX ( genome_fasta, transcript_fasta )
SALMON_QUANT ( input, SALMON_INDEX.out.index, gtf, transcript_fasta, false ) SALMON_QUANT ( input, SALMON_INDEX.out.index, gtf, transcript_fasta, false, '' )
} }
@ -30,6 +30,21 @@ workflow test_salmon_quant_paired_end {
gtf = file(params.test_data['sarscov2']['genome']['genome_gtf'], checkIfExists: true) gtf = file(params.test_data['sarscov2']['genome']['genome_gtf'], checkIfExists: true)
SALMON_INDEX ( genome_fasta, transcript_fasta ) SALMON_INDEX ( genome_fasta, transcript_fasta )
SALMON_QUANT ( input, SALMON_INDEX.out.index, gtf, transcript_fasta, false ) SALMON_QUANT ( input, SALMON_INDEX.out.index, gtf, transcript_fasta, false, '' )
} }
workflow test_salmon_quant_single_end_lib_type_A {
input = [ [ id:'test', single_end:true ], // meta map
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
]
genome_fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
transcript_fasta = file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true)
gtf = file(params.test_data['sarscov2']['genome']['genome_gtf'], checkIfExists: true)
SALMON_INDEX ( genome_fasta, transcript_fasta )
SALMON_QUANT ( input, SALMON_INDEX.out.index, gtf, transcript_fasta, false, 'A' )
}

View file

@ -95,7 +95,57 @@
md5sum: 8d1970505b2b08ca0eb5ff7722b48cde md5sum: 8d1970505b2b08ca0eb5ff7722b48cde
- path: ./output/index/salmon/ctg_offsets.bin - path: ./output/index/salmon/ctg_offsets.bin
md5sum: 27a76542337df436436e66017f66dd25 md5sum: 27a76542337df436436e66017f66dd25
- path: ./output/index/salmon/rank.bin - path: ./output/index/salmon/rank.bin
md5sum: 3f34dca1ec26cdf89a6d19b1d1c07e71 md5sum: 3f34dca1ec26cdf89a6d19b1d1c07e71
- path: ./output/index/salmon/pos.bin - path: ./output/index/salmon/pos.bin
- path: ./output/index/salmon/seq.bin - path: ./output/index/salmon/seq.bin
- name: salmon quant test_salmon_quant_single_end_lib_type_A
command: nextflow run tests/software/salmon/quant -entry test_salmon_quant_single_end_lib_type_A -c tests/config/nextflow.config
tags:
- salmon/quant
- salmon
files:
- path: ./output/salmon/test/cmd_info.json
- path: output/salmon/test/quant.sf
md5sum: 687368b9963874c1797d210310b38516
- path: ./output/salmon/test/lib_format_counts.json
- path: ./output/salmon/test/quant.genes.sf
md5sum: ad4d31437f06db49b2436abeec29c78e
- path: ./output/salmon/test/logs/salmon_quant.log
- path: output/salmon/test/aux_info/expected_bias.gz
md5sum: 24ee10af39b41ecf4f4e08faaaf537ee
- path: output/salmon/test/aux_info/observed_bias_3p.gz
md5sum: ef13c06a538e9c34ca9f84212c82f44e
- path: ./output/salmon/test/aux_info/meta_info.json
- path: ./output/salmon/test/aux_info/fld.gz
- path: output/salmon/test/aux_info/ambig_info.tsv
md5sum: 2ee3dc3080ad7222e0687481e7a1ee03
- path: ./output/salmon/test/aux_info/observed_bias.gz
md5sum: ef13c06a538e9c34ca9f84212c82f44e
- path: output/salmon/test/libParams/flenDist.txt
md5sum: 2de170bdc9f6fd237d286429b292bb28
- path: ./output/index/salmon/ref_indexing.log
- path: output/index/salmon/refseq.bin
md5sum: 79c4ddf34be3a98d5a7b9d153629a6f7
- path: output/index/salmon/versionInfo.json
md5sum: 204865f645102587c4953fccb256797c
- path: output/index/salmon/complete_ref_lens.bin
md5sum: f57562f1fca3ae7b133f895ae13c3d08
- path: output/index/salmon/mphf.bin
md5sum: 53669a47610e33e031faafd32703b714
- path: output/index/salmon/duplicate_clusters.tsv
md5sum: 51b5292e3a874119c0e1aa566e95d70c
- path: output/index/salmon/reflengths.bin
md5sum: f57562f1fca3ae7b133f895ae13c3d08
- path: output/index/salmon/info.json
md5sum: 61ff4d3471134c280668355ddd39e99f
- path: output/index/salmon/refAccumLengths.bin
md5sum: 8d1970505b2b08ca0eb5ff7722b48cde
- path: output/index/salmon/ctg_offsets.bin
md5sum: 27a76542337df436436e66017f66dd25
- path: output/index/salmon/rank.bin
md5sum: 3f34dca1ec26cdf89a6d19b1d1c07e71
- path: ./output/index/salmon/pos.bin
- path: ./output/index/salmon/seq.bin