mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-12-22 11:08:17 +00:00
fix: reduce number of required input files for damage profiler (#612)
* Reduce number of required input files for damage profiler * Remove rebugging * Add optional species list file. * Working pending updated test-dataset update * Add genome header to config
This commit is contained in:
parent
4e9e732b76
commit
97fe899f79
5 changed files with 144 additions and 35 deletions
|
@ -22,25 +22,30 @@ process DAMAGEPROFILER {
|
||||||
tuple val(meta), path(bam)
|
tuple val(meta), path(bam)
|
||||||
path fasta
|
path fasta
|
||||||
path fai
|
path fai
|
||||||
|
path specieslist
|
||||||
|
|
||||||
output:
|
output:
|
||||||
tuple val(meta), path("${prefix}"), emit: results
|
tuple val(meta), path("${prefix}"), emit: results
|
||||||
path "versions.yml" , emit: versions
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
script:
|
script:
|
||||||
prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
|
def software = getSoftwareName(task.process)
|
||||||
|
prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
|
||||||
|
def reference = fasta ? "-r $fasta" : ""
|
||||||
|
def species_list = specieslist ? "-sf $specieslist" : ""
|
||||||
|
|
||||||
"""
|
"""
|
||||||
damageprofiler \\
|
damageprofiler \\
|
||||||
-i $bam \\
|
-i $bam \\
|
||||||
-r $fasta \\
|
-o $prefix/ \\
|
||||||
-o $prefix/ \\
|
$options.args \\
|
||||||
$options.args
|
$reference \\
|
||||||
|
$species_list
|
||||||
|
|
||||||
cat <<-END_VERSIONS > versions.yml
|
cat <<-END_VERSIONS > versions.yml
|
||||||
${getProcessName(task.process)}:
|
${getProcessName(task.process)}:
|
||||||
${getSoftwareName(task.process)}: \$(damageprofiler -v | sed 's/^DamageProfiler v//')
|
${getSoftwareName(task.process)}: \$(damageprofiler -v | sed 's/^DamageProfiler v//')
|
||||||
END_VERSIONS
|
END_VERSIONS
|
||||||
"""
|
"""
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,12 +32,16 @@ input:
|
||||||
pattern: "*.{bam,cram,sam}"
|
pattern: "*.{bam,cram,sam}"
|
||||||
- fasta:
|
- fasta:
|
||||||
type: file
|
type: file
|
||||||
description: FASTA reference file
|
description: OPTIONAL FASTA reference file
|
||||||
pattern: "*.{fasta,fna,fa}"
|
pattern: "*.{fasta,fna,fa}"
|
||||||
- fai:
|
- fai:
|
||||||
type: file
|
type: file
|
||||||
description: FASTA index file from samtools faidx
|
description: OPTIONAL FASTA index file from samtools faidx
|
||||||
pattern: "*.{fai}"
|
pattern: "*.{fai}"
|
||||||
|
- specieslist:
|
||||||
|
type: file
|
||||||
|
description: OPTIONAL text file with list of target reference headers
|
||||||
|
pattern: "*.{txt}"
|
||||||
|
|
||||||
output:
|
output:
|
||||||
- versions:
|
- versions:
|
||||||
|
|
|
@ -104,6 +104,7 @@ params {
|
||||||
genome_gtf = "${test_data_dir}/genomics/homo_sapiens/genome/genome.gtf"
|
genome_gtf = "${test_data_dir}/genomics/homo_sapiens/genome/genome.gtf"
|
||||||
genome_sizes = "${test_data_dir}/genomics/homo_sapiens/genome/genome.sizes"
|
genome_sizes = "${test_data_dir}/genomics/homo_sapiens/genome/genome.sizes"
|
||||||
genome_bed = "${test_data_dir}/genomics/homo_sapiens/genome/genome.bed"
|
genome_bed = "${test_data_dir}/genomics/homo_sapiens/genome/genome.bed"
|
||||||
|
genome_header = "${test_data_dir}/genomics/homo_sapiens/genome/genome.header"
|
||||||
genome_bed_gz = "${test_data_dir}/genomics/homo_sapiens/genome/genome.bed.gz"
|
genome_bed_gz = "${test_data_dir}/genomics/homo_sapiens/genome/genome.bed.gz"
|
||||||
genome_bed_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/genome.bed.gz.tbi"
|
genome_bed_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/genome.bed.gz.tbi"
|
||||||
transcriptome_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/transcriptome.fasta"
|
transcriptome_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/transcriptome.fasta"
|
||||||
|
@ -119,18 +120,19 @@ params {
|
||||||
repeat_expansions = "${test_data_dir}/genomics/homo_sapiens/genome/loci/repeat_expansions.json"
|
repeat_expansions = "${test_data_dir}/genomics/homo_sapiens/genome/loci/repeat_expansions.json"
|
||||||
}
|
}
|
||||||
'illumina' {
|
'illumina' {
|
||||||
test_paired_end_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam"
|
test_paired_end_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam"
|
||||||
test_paired_end_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai"
|
test_paired_end_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai"
|
||||||
test_paired_end_markduplicates_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam"
|
test_paired_end_markduplicates_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam"
|
||||||
test_paired_end_markduplicates_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam.bai"
|
test_paired_end_markduplicates_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam.bai"
|
||||||
test_paired_end_recalibrated_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam"
|
test_paired_end_markduplicates_sorted_referencesn_txt = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.referencesn.txt"
|
||||||
test_paired_end_recalibrated_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai"
|
test_paired_end_recalibrated_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam"
|
||||||
test_paired_end_umi_consensus_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_consensus.bam"
|
test_paired_end_recalibrated_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai"
|
||||||
test_paired_end_umi_converted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_converted.bam"
|
test_paired_end_umi_consensus_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_consensus.bam"
|
||||||
test_paired_end_umi_grouped_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_grouped.bam"
|
test_paired_end_umi_converted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_converted.bam"
|
||||||
test_paired_end_umi_histogram_txt = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_histogram.txt"
|
test_paired_end_umi_grouped_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_grouped.bam"
|
||||||
test_paired_end_umi_unsorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_unsorted.bam"
|
test_paired_end_umi_histogram_txt = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_histogram.txt"
|
||||||
test_paired_end_umi_unsorted_tagged_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.unsorted_tagged.bam"
|
test_paired_end_umi_unsorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_unsorted.bam"
|
||||||
|
test_paired_end_umi_unsorted_tagged_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.unsorted_tagged.bam"
|
||||||
|
|
||||||
test2_paired_end_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam"
|
test2_paired_end_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam"
|
||||||
test2_paired_end_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai"
|
test2_paired_end_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai"
|
||||||
|
|
|
@ -6,10 +6,34 @@ include { DAMAGEPROFILER } from '../../../modules/damageprofiler/main.nf' addPar
|
||||||
|
|
||||||
workflow test_damageprofiler {
|
workflow test_damageprofiler {
|
||||||
|
|
||||||
input = [ [ id:'test', single_end:false ], // meta map
|
input = [ [ id:'test', single_end:false ], // meta map
|
||||||
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] ]
|
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_markduplicates_sorted_bam'], checkIfExists: true) ] ]
|
||||||
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
|
fasta = []
|
||||||
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
|
fai = []
|
||||||
|
species_list = []
|
||||||
|
|
||||||
DAMAGEPROFILER ( input, fasta, fai )
|
|
||||||
|
DAMAGEPROFILER ( input, fasta, fai, species_list )
|
||||||
|
}
|
||||||
|
|
||||||
|
workflow test_damageprofiler_reference {
|
||||||
|
|
||||||
|
input = [ [ id:'test', single_end:false ], // meta map
|
||||||
|
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_markduplicates_sorted_bam'], checkIfExists: true) ] ]
|
||||||
|
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
|
||||||
|
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
|
||||||
|
species_list = []
|
||||||
|
|
||||||
|
DAMAGEPROFILER ( input, fasta, fai, species_list )
|
||||||
|
}
|
||||||
|
|
||||||
|
workflow test_damageprofiler_specieslist {
|
||||||
|
|
||||||
|
input = [ [ id:'test', single_end:false ], // meta map
|
||||||
|
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_markduplicates_sorted_bam'], checkIfExists: true) ] ]
|
||||||
|
fasta = []
|
||||||
|
fai = []
|
||||||
|
species_list = file(params.test_data['homo_sapiens']['genome']['genome_header'], checkIfExists: true)
|
||||||
|
|
||||||
|
DAMAGEPROFILER ( input, fasta, fai, species_list )
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,13 +4,13 @@
|
||||||
- damageprofiler
|
- damageprofiler
|
||||||
files:
|
files:
|
||||||
- path: output/damageprofiler/test/3p_freq_misincorporations.txt
|
- path: output/damageprofiler/test/3p_freq_misincorporations.txt
|
||||||
md5sum: da4cac90c78899a7cb6d72d415392b49
|
md5sum: de3b84d946a6b63cdcfadf82bf6854c0
|
||||||
- path: output/damageprofiler/test/3pGtoA_freq.txt
|
- path: output/damageprofiler/test/3pGtoA_freq.txt
|
||||||
md5sum: 8dab75d51a4b943b501d0995169c767f
|
md5sum: 61c903b1504ed7d7182570dfc75e4498
|
||||||
- path: output/damageprofiler/test/5pCtoT_freq.txt
|
- path: output/damageprofiler/test/5pCtoT_freq.txt
|
||||||
md5sum: fcc48ee5f72edff930d627c8bfdd8a5b
|
md5sum: 15a75b60ee519b61ce04a83fe3afe855
|
||||||
- path: output/damageprofiler/test/5p_freq_misincorporations.txt
|
- path: output/damageprofiler/test/5p_freq_misincorporations.txt
|
||||||
md5sum: 54665474f5ef17dcc268567e5eaa7d86
|
md5sum: 3b3240d6c1a3491e461b39199a9fcfe3
|
||||||
- path: output/damageprofiler/test/DamagePlot_five_prime.svg
|
- path: output/damageprofiler/test/DamagePlot_five_prime.svg
|
||||||
- path: output/damageprofiler/test/DamagePlot.pdf
|
- path: output/damageprofiler/test/DamagePlot.pdf
|
||||||
- path: output/damageprofiler/test/DamagePlot_three_prime.svg
|
- path: output/damageprofiler/test/DamagePlot_three_prime.svg
|
||||||
|
@ -18,19 +18,93 @@
|
||||||
contains:
|
contains:
|
||||||
- "FINISHED SUCCESSFULLY"
|
- "FINISHED SUCCESSFULLY"
|
||||||
- path: output/damageprofiler/test/dmgprof.json
|
- path: output/damageprofiler/test/dmgprof.json
|
||||||
md5sum: 98499024c7e937896e481f2d3cfbdd3e
|
md5sum: 2e54e712d2ae9e32c4c298e5fd8f60fe
|
||||||
- path: output/damageprofiler/test/DNA_comp_genome.txt
|
- path: output/damageprofiler/test/DNA_comp_genome.txt
|
||||||
md5sum: f91e70760d91a1193a27e360aaddf2fd
|
md5sum: fea48af1ecf491b439d36d4a919473df
|
||||||
- path: output/damageprofiler/test/DNA_composition_sample.txt
|
- path: output/damageprofiler/test/DNA_composition_sample.txt
|
||||||
md5sum: 1257eb3eb42484647bfba2151f9ef04f
|
md5sum: 9e17a0b1e5ad4eb13201cd24ad8507dd
|
||||||
- path: output/damageprofiler/test/edit_distance.pdf
|
- path: output/damageprofiler/test/edit_distance.pdf
|
||||||
- path: output/damageprofiler/test/edit_distance.svg
|
- path: output/damageprofiler/test/edit_distance.svg
|
||||||
- path: output/damageprofiler/test/editDistance.txt
|
- path: output/damageprofiler/test/editDistance.txt
|
||||||
md5sum: af2d2f4a99058ec56eae88ec27779e38
|
md5sum: 04d14b449a5afa8b5dbff0dfa762356b
|
||||||
- path: output/damageprofiler/test/Length_plot_combined_data.svg
|
- path: output/damageprofiler/test/Length_plot_combined_data.svg
|
||||||
- path: output/damageprofiler/test/Length_plot_forward_reverse_separated.svg
|
- path: output/damageprofiler/test/Length_plot_forward_reverse_separated.svg
|
||||||
- path: output/damageprofiler/test/Length_plot.pdf
|
- path: output/damageprofiler/test/Length_plot.pdf
|
||||||
- path: output/damageprofiler/test/lgdistribution.txt
|
- path: output/damageprofiler/test/lgdistribution.txt
|
||||||
md5sum: c5d029bf3a92b613310ee23f47d94981
|
md5sum: df2e19195185ea9ee05e8e84b2948f36
|
||||||
- path: output/damageprofiler/test/misincorporation.txt
|
- path: output/damageprofiler/test/misincorporation.txt
|
||||||
md5sum: 3aa6dd749010a492d92a815a83c196a8
|
md5sum: bec0c5fc2fa9c82b04949e2d8b6e979c
|
||||||
|
|
||||||
|
- name: damageprofiler_reference
|
||||||
|
command: nextflow run ./tests/modules/damageprofiler -entry test_damageprofiler_reference -c tests/config/nextflow.config -dump-channels
|
||||||
|
tags:
|
||||||
|
- damageprofiler
|
||||||
|
files:
|
||||||
|
- path: output/damageprofiler/test/3p_freq_misincorporations.txt
|
||||||
|
md5sum: de3b84d946a6b63cdcfadf82bf6854c0
|
||||||
|
- path: output/damageprofiler/test/3pGtoA_freq.txt
|
||||||
|
md5sum: 61c903b1504ed7d7182570dfc75e4498
|
||||||
|
- path: output/damageprofiler/test/5pCtoT_freq.txt
|
||||||
|
md5sum: 15a75b60ee519b61ce04a83fe3afe855
|
||||||
|
- path: output/damageprofiler/test/5p_freq_misincorporations.txt
|
||||||
|
md5sum: 3b3240d6c1a3491e461b39199a9fcfe3
|
||||||
|
- path: output/damageprofiler/test/DamagePlot_five_prime.svg
|
||||||
|
- path: output/damageprofiler/test/DamagePlot.pdf
|
||||||
|
- path: output/damageprofiler/test/DamagePlot_three_prime.svg
|
||||||
|
- path: output/damageprofiler/test/DamageProfiler.log
|
||||||
|
contains:
|
||||||
|
- "FINISHED SUCCESSFULLY"
|
||||||
|
- path: output/damageprofiler/test/dmgprof.json
|
||||||
|
md5sum: 2e54e712d2ae9e32c4c298e5fd8f60fe
|
||||||
|
- path: output/damageprofiler/test/DNA_comp_genome.txt
|
||||||
|
md5sum: fea48af1ecf491b439d36d4a919473df
|
||||||
|
- path: output/damageprofiler/test/DNA_composition_sample.txt
|
||||||
|
md5sum: 9e17a0b1e5ad4eb13201cd24ad8507dd
|
||||||
|
- path: output/damageprofiler/test/edit_distance.pdf
|
||||||
|
- path: output/damageprofiler/test/edit_distance.svg
|
||||||
|
- path: output/damageprofiler/test/editDistance.txt
|
||||||
|
md5sum: 04d14b449a5afa8b5dbff0dfa762356b
|
||||||
|
- path: output/damageprofiler/test/Length_plot_combined_data.svg
|
||||||
|
- path: output/damageprofiler/test/Length_plot_forward_reverse_separated.svg
|
||||||
|
- path: output/damageprofiler/test/Length_plot.pdf
|
||||||
|
- path: output/damageprofiler/test/lgdistribution.txt
|
||||||
|
md5sum: df2e19195185ea9ee05e8e84b2948f36
|
||||||
|
- path: output/damageprofiler/test/misincorporation.txt
|
||||||
|
md5sum: bec0c5fc2fa9c82b04949e2d8b6e979c
|
||||||
|
|
||||||
|
- name: damageprofiler_specieslist
|
||||||
|
command: nextflow run ./tests/modules/damageprofiler -entry test_damageprofiler_specieslist -c tests/config/nextflow.config -dump-channels
|
||||||
|
tags:
|
||||||
|
- damageprofiler
|
||||||
|
files:
|
||||||
|
- path: output/damageprofiler/test/chr22/3p_freq_misincorporations.txt
|
||||||
|
md5sum: de3b84d946a6b63cdcfadf82bf6854c0
|
||||||
|
- path: output/damageprofiler/test/chr22/3pGtoA_freq.txt
|
||||||
|
md5sum: 61c903b1504ed7d7182570dfc75e4498
|
||||||
|
- path: output/damageprofiler/test/chr22/5pCtoT_freq.txt
|
||||||
|
md5sum: 15a75b60ee519b61ce04a83fe3afe855
|
||||||
|
- path: output/damageprofiler/test/chr22/5p_freq_misincorporations.txt
|
||||||
|
md5sum: 3b3240d6c1a3491e461b39199a9fcfe3
|
||||||
|
- path: output/damageprofiler/test/chr22/DamagePlot_five_prime.svg
|
||||||
|
- path: output/damageprofiler/test/chr22/DamagePlot.pdf
|
||||||
|
- path: output/damageprofiler/test/chr22/DamagePlot_three_prime.svg
|
||||||
|
- path: output/damageprofiler/test/DamageProfiler.log
|
||||||
|
contains:
|
||||||
|
- "FINISHED SUCCESSFULLY"
|
||||||
|
- path: output/damageprofiler/test/chr22/dmgprof.json
|
||||||
|
md5sum: 2e54e712d2ae9e32c4c298e5fd8f60fe
|
||||||
|
- path: output/damageprofiler/test/chr22/DNA_comp_genome.txt
|
||||||
|
md5sum: fea48af1ecf491b439d36d4a919473df
|
||||||
|
- path: output/damageprofiler/test/chr22/DNA_composition_sample.txt
|
||||||
|
md5sum: 9e17a0b1e5ad4eb13201cd24ad8507dd
|
||||||
|
- path: output/damageprofiler/test/chr22/edit_distance.pdf
|
||||||
|
- path: output/damageprofiler/test/chr22/edit_distance.svg
|
||||||
|
- path: output/damageprofiler/test/chr22/editDistance.txt
|
||||||
|
md5sum: 04d14b449a5afa8b5dbff0dfa762356b
|
||||||
|
- path: output/damageprofiler/test/chr22/Length_plot_combined_data.svg
|
||||||
|
- path: output/damageprofiler/test/chr22/Length_plot_forward_reverse_separated.svg
|
||||||
|
- path: output/damageprofiler/test/chr22/Length_plot.pdf
|
||||||
|
- path: output/damageprofiler/test/chr22/lgdistribution.txt
|
||||||
|
md5sum: df2e19195185ea9ee05e8e84b2948f36
|
||||||
|
- path: output/damageprofiler/test/chr22/misincorporation.txt
|
||||||
|
md5sum: bec0c5fc2fa9c82b04949e2d8b6e979c
|
||||||
|
|
Loading…
Reference in a new issue