fix: reduce number of required input files for damage profiler (#612)

* Reduce number of required input files for damage profiler

* Remove rebugging

* Add optional species list file.

* Working pending updated test-dataset update

* Add genome header to config
This commit is contained in:
James A. Fellows Yates 2021-10-20 10:02:30 +02:00 committed by GitHub
parent 4e9e732b76
commit 97fe899f79
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 144 additions and 35 deletions

View file

@ -22,25 +22,30 @@ process DAMAGEPROFILER {
tuple val(meta), path(bam)
path fasta
path fai
path specieslist
output:
tuple val(meta), path("${prefix}"), emit: results
path "versions.yml" , emit: versions
script:
prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def software = getSoftwareName(task.process)
prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def reference = fasta ? "-r $fasta" : ""
def species_list = specieslist ? "-sf $specieslist" : ""
"""
damageprofiler \\
-i $bam \\
-r $fasta \\
-o $prefix/ \\
$options.args
-i $bam \\
-o $prefix/ \\
$options.args \\
$reference \\
$species_list
cat <<-END_VERSIONS > versions.yml
${getProcessName(task.process)}:
${getSoftwareName(task.process)}: \$(damageprofiler -v | sed 's/^DamageProfiler v//')
END_VERSIONS
"""
}

View file

@ -32,12 +32,16 @@ input:
pattern: "*.{bam,cram,sam}"
- fasta:
type: file
description: FASTA reference file
description: OPTIONAL FASTA reference file
pattern: "*.{fasta,fna,fa}"
- fai:
type: file
description: FASTA index file from samtools faidx
description: OPTIONAL FASTA index file from samtools faidx
pattern: "*.{fai}"
- specieslist:
type: file
description: OPTIONAL text file with list of target reference headers
pattern: "*.{txt}"
output:
- versions:

View file

@ -104,6 +104,7 @@ params {
genome_gtf = "${test_data_dir}/genomics/homo_sapiens/genome/genome.gtf"
genome_sizes = "${test_data_dir}/genomics/homo_sapiens/genome/genome.sizes"
genome_bed = "${test_data_dir}/genomics/homo_sapiens/genome/genome.bed"
genome_header = "${test_data_dir}/genomics/homo_sapiens/genome/genome.header"
genome_bed_gz = "${test_data_dir}/genomics/homo_sapiens/genome/genome.bed.gz"
genome_bed_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/genome.bed.gz.tbi"
transcriptome_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/transcriptome.fasta"
@ -119,18 +120,19 @@ params {
repeat_expansions = "${test_data_dir}/genomics/homo_sapiens/genome/loci/repeat_expansions.json"
}
'illumina' {
test_paired_end_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam"
test_paired_end_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai"
test_paired_end_markduplicates_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam"
test_paired_end_markduplicates_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam.bai"
test_paired_end_recalibrated_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam"
test_paired_end_recalibrated_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai"
test_paired_end_umi_consensus_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_consensus.bam"
test_paired_end_umi_converted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_converted.bam"
test_paired_end_umi_grouped_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_grouped.bam"
test_paired_end_umi_histogram_txt = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_histogram.txt"
test_paired_end_umi_unsorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_unsorted.bam"
test_paired_end_umi_unsorted_tagged_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.unsorted_tagged.bam"
test_paired_end_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam"
test_paired_end_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai"
test_paired_end_markduplicates_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam"
test_paired_end_markduplicates_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam.bai"
test_paired_end_markduplicates_sorted_referencesn_txt = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.referencesn.txt"
test_paired_end_recalibrated_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam"
test_paired_end_recalibrated_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai"
test_paired_end_umi_consensus_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_consensus.bam"
test_paired_end_umi_converted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_converted.bam"
test_paired_end_umi_grouped_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_grouped.bam"
test_paired_end_umi_histogram_txt = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_histogram.txt"
test_paired_end_umi_unsorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_unsorted.bam"
test_paired_end_umi_unsorted_tagged_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.unsorted_tagged.bam"
test2_paired_end_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam"
test2_paired_end_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai"

View file

@ -6,10 +6,34 @@ include { DAMAGEPROFILER } from '../../../modules/damageprofiler/main.nf' addPar
workflow test_damageprofiler {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] ]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_markduplicates_sorted_bam'], checkIfExists: true) ] ]
fasta = []
fai = []
species_list = []
DAMAGEPROFILER ( input, fasta, fai )
DAMAGEPROFILER ( input, fasta, fai, species_list )
}
workflow test_damageprofiler_reference {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_markduplicates_sorted_bam'], checkIfExists: true) ] ]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
species_list = []
DAMAGEPROFILER ( input, fasta, fai, species_list )
}
workflow test_damageprofiler_specieslist {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_markduplicates_sorted_bam'], checkIfExists: true) ] ]
fasta = []
fai = []
species_list = file(params.test_data['homo_sapiens']['genome']['genome_header'], checkIfExists: true)
DAMAGEPROFILER ( input, fasta, fai, species_list )
}

View file

@ -4,13 +4,13 @@
- damageprofiler
files:
- path: output/damageprofiler/test/3p_freq_misincorporations.txt
md5sum: da4cac90c78899a7cb6d72d415392b49
md5sum: de3b84d946a6b63cdcfadf82bf6854c0
- path: output/damageprofiler/test/3pGtoA_freq.txt
md5sum: 8dab75d51a4b943b501d0995169c767f
md5sum: 61c903b1504ed7d7182570dfc75e4498
- path: output/damageprofiler/test/5pCtoT_freq.txt
md5sum: fcc48ee5f72edff930d627c8bfdd8a5b
md5sum: 15a75b60ee519b61ce04a83fe3afe855
- path: output/damageprofiler/test/5p_freq_misincorporations.txt
md5sum: 54665474f5ef17dcc268567e5eaa7d86
md5sum: 3b3240d6c1a3491e461b39199a9fcfe3
- path: output/damageprofiler/test/DamagePlot_five_prime.svg
- path: output/damageprofiler/test/DamagePlot.pdf
- path: output/damageprofiler/test/DamagePlot_three_prime.svg
@ -18,19 +18,93 @@
contains:
- "FINISHED SUCCESSFULLY"
- path: output/damageprofiler/test/dmgprof.json
md5sum: 98499024c7e937896e481f2d3cfbdd3e
md5sum: 2e54e712d2ae9e32c4c298e5fd8f60fe
- path: output/damageprofiler/test/DNA_comp_genome.txt
md5sum: f91e70760d91a1193a27e360aaddf2fd
md5sum: fea48af1ecf491b439d36d4a919473df
- path: output/damageprofiler/test/DNA_composition_sample.txt
md5sum: 1257eb3eb42484647bfba2151f9ef04f
md5sum: 9e17a0b1e5ad4eb13201cd24ad8507dd
- path: output/damageprofiler/test/edit_distance.pdf
- path: output/damageprofiler/test/edit_distance.svg
- path: output/damageprofiler/test/editDistance.txt
md5sum: af2d2f4a99058ec56eae88ec27779e38
md5sum: 04d14b449a5afa8b5dbff0dfa762356b
- path: output/damageprofiler/test/Length_plot_combined_data.svg
- path: output/damageprofiler/test/Length_plot_forward_reverse_separated.svg
- path: output/damageprofiler/test/Length_plot.pdf
- path: output/damageprofiler/test/lgdistribution.txt
md5sum: c5d029bf3a92b613310ee23f47d94981
md5sum: df2e19195185ea9ee05e8e84b2948f36
- path: output/damageprofiler/test/misincorporation.txt
md5sum: 3aa6dd749010a492d92a815a83c196a8
md5sum: bec0c5fc2fa9c82b04949e2d8b6e979c
- name: damageprofiler_reference
command: nextflow run ./tests/modules/damageprofiler -entry test_damageprofiler_reference -c tests/config/nextflow.config -dump-channels
tags:
- damageprofiler
files:
- path: output/damageprofiler/test/3p_freq_misincorporations.txt
md5sum: de3b84d946a6b63cdcfadf82bf6854c0
- path: output/damageprofiler/test/3pGtoA_freq.txt
md5sum: 61c903b1504ed7d7182570dfc75e4498
- path: output/damageprofiler/test/5pCtoT_freq.txt
md5sum: 15a75b60ee519b61ce04a83fe3afe855
- path: output/damageprofiler/test/5p_freq_misincorporations.txt
md5sum: 3b3240d6c1a3491e461b39199a9fcfe3
- path: output/damageprofiler/test/DamagePlot_five_prime.svg
- path: output/damageprofiler/test/DamagePlot.pdf
- path: output/damageprofiler/test/DamagePlot_three_prime.svg
- path: output/damageprofiler/test/DamageProfiler.log
contains:
- "FINISHED SUCCESSFULLY"
- path: output/damageprofiler/test/dmgprof.json
md5sum: 2e54e712d2ae9e32c4c298e5fd8f60fe
- path: output/damageprofiler/test/DNA_comp_genome.txt
md5sum: fea48af1ecf491b439d36d4a919473df
- path: output/damageprofiler/test/DNA_composition_sample.txt
md5sum: 9e17a0b1e5ad4eb13201cd24ad8507dd
- path: output/damageprofiler/test/edit_distance.pdf
- path: output/damageprofiler/test/edit_distance.svg
- path: output/damageprofiler/test/editDistance.txt
md5sum: 04d14b449a5afa8b5dbff0dfa762356b
- path: output/damageprofiler/test/Length_plot_combined_data.svg
- path: output/damageprofiler/test/Length_plot_forward_reverse_separated.svg
- path: output/damageprofiler/test/Length_plot.pdf
- path: output/damageprofiler/test/lgdistribution.txt
md5sum: df2e19195185ea9ee05e8e84b2948f36
- path: output/damageprofiler/test/misincorporation.txt
md5sum: bec0c5fc2fa9c82b04949e2d8b6e979c
- name: damageprofiler_specieslist
command: nextflow run ./tests/modules/damageprofiler -entry test_damageprofiler_specieslist -c tests/config/nextflow.config -dump-channels
tags:
- damageprofiler
files:
- path: output/damageprofiler/test/chr22/3p_freq_misincorporations.txt
md5sum: de3b84d946a6b63cdcfadf82bf6854c0
- path: output/damageprofiler/test/chr22/3pGtoA_freq.txt
md5sum: 61c903b1504ed7d7182570dfc75e4498
- path: output/damageprofiler/test/chr22/5pCtoT_freq.txt
md5sum: 15a75b60ee519b61ce04a83fe3afe855
- path: output/damageprofiler/test/chr22/5p_freq_misincorporations.txt
md5sum: 3b3240d6c1a3491e461b39199a9fcfe3
- path: output/damageprofiler/test/chr22/DamagePlot_five_prime.svg
- path: output/damageprofiler/test/chr22/DamagePlot.pdf
- path: output/damageprofiler/test/chr22/DamagePlot_three_prime.svg
- path: output/damageprofiler/test/DamageProfiler.log
contains:
- "FINISHED SUCCESSFULLY"
- path: output/damageprofiler/test/chr22/dmgprof.json
md5sum: 2e54e712d2ae9e32c4c298e5fd8f60fe
- path: output/damageprofiler/test/chr22/DNA_comp_genome.txt
md5sum: fea48af1ecf491b439d36d4a919473df
- path: output/damageprofiler/test/chr22/DNA_composition_sample.txt
md5sum: 9e17a0b1e5ad4eb13201cd24ad8507dd
- path: output/damageprofiler/test/chr22/edit_distance.pdf
- path: output/damageprofiler/test/chr22/edit_distance.svg
- path: output/damageprofiler/test/chr22/editDistance.txt
md5sum: 04d14b449a5afa8b5dbff0dfa762356b
- path: output/damageprofiler/test/chr22/Length_plot_combined_data.svg
- path: output/damageprofiler/test/chr22/Length_plot_forward_reverse_separated.svg
- path: output/damageprofiler/test/chr22/Length_plot.pdf
- path: output/damageprofiler/test/chr22/lgdistribution.txt
md5sum: df2e19195185ea9ee05e8e84b2948f36
- path: output/damageprofiler/test/chr22/misincorporation.txt
md5sum: bec0c5fc2fa9c82b04949e2d8b6e979c