mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2025-01-18 10:56:13 -05:00
207930139a
* New last/lastal to align query sequences on a target index `lastal` is the main program of the [LAST](https://gitlab.com/mcfrith/last) suite. It align query DNA sequences in FASTA or FASTQ format to a target index of DNA or protein sequences. The index is produced by the `lastdb` program (module `last/lastdb`). The score matrix for evaluating the alignment can be chosen among preset ones or computed iteratively by the `last-train` program (module `last/train`). For this reason, the `last/lastal` module proposed here has one input channel containing an optional file, that has to be dummy when not used. The LAST aligner outputs MAF files that can be very large (up to hundreds of gigabytes), therefore this module unconditionally compresses its output with gzip. This new module is part of the work described in Issue #464. During this development, we fix the version of LAST to 1219 to ensure consistency (hence ignore lint's version warning). * Apply suggestions from code review Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com> * Un-hardcode the path to the LAST index. Among multiple alternatives I have chosen the following command to detect the sample name of the index, because it fails in situations where there is no index files in the index folder, and in situations were there are two indexes files in the folder. Not failing would result in feeding garbage information in the INDEX_NAME variable. basename \$(ls $index/*.bck) .bck In case of missing file, a clear error message is given by `ls`. In case of more than one file, the error message of `basename` is more cryptic, unfortunately. (`basename: extra operand ‘.bck’`) Alternatives that do not fail if there is no .bck file: basename $index/*bck .bck find $index -name '*bck' | sed 's/.bck//' Alternatives that do not fail if there are more than one .bck file: basename -s .bck $index/*bck ls $index/*.bck | xargs basename -s .bck find $index -name '*bck' | sed 's/.bck//' Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>
162 lines
16 KiB
Text
162 lines
16 KiB
Text
// Base directory for test data
|
|
def test_data_dir = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data"
|
|
|
|
params {
|
|
test_data {
|
|
'sarscov2' {
|
|
'genome' {
|
|
genome_fasta = "${test_data_dir}/genomics/sarscov2/genome/genome.fasta"
|
|
genome_fasta_fai = "${test_data_dir}/genomics/sarscov2/genome/genome.fasta.fai"
|
|
genome_dict = "${test_data_dir}/genomics/sarscov2/genome/genome.dict"
|
|
genome_gff3 = "${test_data_dir}/genomics/sarscov2/genome/genome.gff3"
|
|
genome_gff3_gz = "${test_data_dir}/genomics/sarscov2/genome/genome.gff3.gz"
|
|
genome_gtf = "${test_data_dir}/genomics/sarscov2/genome/genome.gtf"
|
|
genome_sizes = "${test_data_dir}/genomics/sarscov2/genome/genome.sizes"
|
|
transcriptome_fasta = "${test_data_dir}/genomics/sarscov2/genome/transcriptome.fasta"
|
|
transcriptome_paf = "${test_data_dir}/genomics/sarscov2/genome/transcriptome.paf"
|
|
|
|
test_bed = "${test_data_dir}/genomics/sarscov2/genome/bed/test.bed"
|
|
test_bed_gz = "${test_data_dir}/genomics/sarscov2/genome/bed/test.bed.gz"
|
|
test2_bed = "${test_data_dir}/genomics/sarscov2/genome/bed/test2.bed"
|
|
test_bed12 = "${test_data_dir}/genomics/sarscov2/genome/bed/test.bed12"
|
|
baits_bed = "${test_data_dir}/genomics/sarscov2/genome/bed/baits.bed"
|
|
|
|
kraken2 = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2"
|
|
kraken2_tar_gz = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2.tar.gz"
|
|
|
|
all_sites_fas = "${test_data_dir}/genomics/sarscov2/genome/alignment/all_sites.fas"
|
|
informative_sites_fas = "${test_data_dir}/genomics/sarscov2/genome/alignment/informative_sites.fas"
|
|
|
|
contigs_genome_maf_gz = "${test_data_dir}/genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz"
|
|
contigs_genome_par = "${test_data_dir}/genomics/sarscov2/genome/alignment/last/contigs.genome.par"
|
|
lastdb_tar_gz = "${test_data_dir}/genomics/sarscov2/genome/alignment/last/lastdb.tar.gz"
|
|
}
|
|
'illumina' {
|
|
test_single_end_bam = "${test_data_dir}/genomics/sarscov2/illumina/bam/test.single_end.bam"
|
|
test_single_end_sorted_bam = "${test_data_dir}/genomics/sarscov2/illumina/bam/test.single_end.sorted.bam"
|
|
test_single_end_sorted_bam_bai = "${test_data_dir}/genomics/sarscov2/illumina/bam/test.single_end.sorted.bam.bai"
|
|
test_paired_end_bam = "${test_data_dir}/genomics/sarscov2/illumina/bam/test.paired_end.bam"
|
|
test_paired_end_sorted_bam = "${test_data_dir}/genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam"
|
|
test_paired_end_sorted_bam_bai = "${test_data_dir}/genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai"
|
|
test_paired_end_methylated_bam = "${test_data_dir}/genomics/sarscov2/illumina/bam/test.paired_end.methylated.bam"
|
|
test_paired_end_methylated_sorted_bam = "${test_data_dir}/genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam"
|
|
test_paired_end_methylated_sorted_bam_bai = "${test_data_dir}/genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam.bai"
|
|
test_unaligned_bam = "${test_data_dir}/genomics/sarscov2/illumina/bam/test.unaligned.bam"
|
|
|
|
test_1_fastq_gz = "${test_data_dir}/genomics/sarscov2/illumina/fastq/test_1.fastq.gz"
|
|
test_2_fastq_gz = "${test_data_dir}/genomics/sarscov2/illumina/fastq/test_2.fastq.gz"
|
|
test2_1_fastq_gz = "${test_data_dir}/genomics/sarscov2/illumina/fastq/test2_1.fastq.gz"
|
|
test2_2_fastq_gz = "${test_data_dir}/genomics/sarscov2/illumina/fastq/test2_2.fastq.gz"
|
|
test_methylated_1_fastq_gz = "${test_data_dir}/genomics/sarscov2/illumina/fastq/test.methylated_1.fastq.gz"
|
|
test_methylated_2_fastq_gz = "${test_data_dir}/genomics/sarscov2/illumina/fastq/test.methylated_2.fastq.gz"
|
|
|
|
test_bedgraph = "${test_data_dir}/genomics/sarscov2/illumina/bedgraph/test.bedgraph"
|
|
|
|
test_bigwig = "${test_data_dir}/genomics/sarscov2/illumina/bigwig/test.bigwig"
|
|
|
|
test_wig_gz = "${test_data_dir}/genomics/sarscov2/illumina/wig/test.wig.gz"
|
|
|
|
test_baserecalibrator_table = "${test_data_dir}/genomics/sarscov2/illumina/gatk/test.baserecalibrator.table"
|
|
|
|
test_computematrix_mat_gz = "${test_data_dir}/genomics/sarscov2/illumina/deeptools/test.computeMatrix.mat.gz"
|
|
|
|
test_vcf = "${test_data_dir}/genomics/sarscov2/illumina/vcf/test.vcf"
|
|
test_vcf_gz = "${test_data_dir}/genomics/sarscov2/illumina/vcf/test.vcf.gz"
|
|
test_vcf_gz_tbi = "${test_data_dir}/genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi"
|
|
test2_vcf = "${test_data_dir}/genomics/sarscov2/illumina/vcf/test2.vcf"
|
|
test2_vcf_gz = "${test_data_dir}/genomics/sarscov2/illumina/vcf/test2.vcf.gz"
|
|
test2_vcf_gz_tbi = "${test_data_dir}/genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi"
|
|
test3_vcf = "${test_data_dir}/genomics/sarscov2/illumina/vcf/test3.vcf"
|
|
test3_vcf_gz = "${test_data_dir}/genomics/sarscov2/illumina/vcf/test3.vcf.gz"
|
|
test3_vcf_gz_tbi = "${test_data_dir}/genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi"
|
|
|
|
contigs_fasta = "${test_data_dir}/genomics/sarscov2/illumina/fasta/contigs.fasta"
|
|
scaffolds_fasta = "${test_data_dir}/genomics/sarscov2/illumina/fasta/scaffolds.fasta"
|
|
|
|
assembly_gfa = "${test_data_dir}/genomics/sarscov2/illumina/gfa/assembly.gfa"
|
|
}
|
|
'nanopore' {
|
|
test_sorted_bam = "${test_data_dir}/genomics/sarscov2/nanopore/bam/test.sorted.bam"
|
|
test_sorted_bam_bai = "${test_data_dir}/genomics/sarscov2/nanopore/bam/test.sorted.bam.bai"
|
|
|
|
fast5_tar_gz = "${test_data_dir}/genomics/sarscov2/nanopore/fast5/fast5.tar.gz"
|
|
|
|
test_fastq_gz = "${test_data_dir}/genomics/sarscov2/nanopore/fastq/test.fastq.gz"
|
|
|
|
test_sequencing_summary = "${test_data_dir}/genomics/sarscov2/nanopore/sequencing_summary/test.sequencing_summary.txt"
|
|
}
|
|
}
|
|
'homo_sapiens' {
|
|
'genome' {
|
|
genome_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/genome.fasta"
|
|
genome_fasta_fai = "${test_data_dir}/genomics/homo_sapiens/genome/genome.fasta.fai"
|
|
genome_dict = "${test_data_dir}/genomics/homo_sapiens/genome/genome.dict"
|
|
genome_gff3 = "${test_data_dir}/genomics/homo_sapiens/genome/genome.gff3"
|
|
genome_gtf = "${test_data_dir}/genomics/homo_sapiens/genome/genome.gtf"
|
|
genome_sizes = "${test_data_dir}/genomics/homo_sapiens/genome/genome.sizes"
|
|
genome_bed = "${test_data_dir}/genomics/homo_sapiens/genome/genome.bed"
|
|
transcriptome_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/transcriptome.fasta"
|
|
|
|
dbsnp_146_hg38_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz"
|
|
dbsnp_146_hg38_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi"
|
|
gnomad_r2_1_1_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz"
|
|
gnomad_r2_1_1_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz.tbi"
|
|
mills_and_1000g_indels_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz"
|
|
mills_and_1000g_indels_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz.tbi"
|
|
index_salmon = "${test_data_dir}/genomics/homo_sapiens/genome/index/salmon"
|
|
}
|
|
'illumina' {
|
|
test_paired_end_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam"
|
|
test_paired_end_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai"
|
|
test_paired_end_markduplicates_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam"
|
|
test_paired_end_markduplicates_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam.bai"
|
|
test_paired_end_recalibrated_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam"
|
|
test_paired_end_recalibrated_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai"
|
|
test_paired_end_umi_consensus_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_consensus.bam"
|
|
test_paired_end_umi_converted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_converted.bam"
|
|
test_paired_end_umi_grouped_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_grouped.bam"
|
|
test_paired_end_umi_histogram_txt = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_histogram.txt"
|
|
test_paired_end_umi_unsorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_unsorted.bam"
|
|
test_paired_end_umi_unsorted_tagged_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.unsorted_tagged.bam"
|
|
|
|
test2_paired_end_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam"
|
|
test2_paired_end_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai"
|
|
test2_paired_end_markduplicates_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.markduplicates.sorted.bam"
|
|
test2_paired_end_markduplicates_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.markduplicates.sorted.bam.bai"
|
|
test2_paired_end_recalibrated_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam"
|
|
test2_paired_end_recalibrated_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai"
|
|
test2_paired_end_umi_consensus_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_consensus.bam"
|
|
test2_paired_end_umi_converted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_converted.bam"
|
|
test2_paired_end_umi_grouped_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_grouped.bam"
|
|
test2_paired_end_umi_histogram_txt = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_histogram.txt"
|
|
test2_paired_end_umi_unsorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_unsorted.bam"
|
|
test2_paired_end_umi_unsorted_tagged_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.unsorted_tagged.bam"
|
|
|
|
test_1_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz"
|
|
test_2_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz"
|
|
test_umi_1_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test.umi_1.fastq.gz"
|
|
test_umi_2_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test.umi_2.fastq.gz"
|
|
test2_1_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test2_1.fastq.gz"
|
|
test2_2_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test2_2.fastq.gz"
|
|
test2_umi_1_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test2.umi_1.fastq.gz"
|
|
test2_umi_2_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test2.umi_2.fastq.gz"
|
|
|
|
test_baserecalibrator_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test.baserecalibrator.table"
|
|
test2_baserecalibrator_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test2.baserecalibrator.table"
|
|
|
|
test_genome_vcf = "${test_data_dir}/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf"
|
|
test_genome_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz"
|
|
test_genome_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi"
|
|
test_genome_vcf_idx = "${test_data_dir}/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.idx"
|
|
|
|
test2_genome_vcf = "${test_data_dir}/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf"
|
|
test2_genome_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz"
|
|
test2_genome_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz.tbi"
|
|
test2_genome_vcf_idx = "${test_data_dir}/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.idx"
|
|
|
|
test_10x_1_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/10xgenomics/test.10x_1.fastq.gz"
|
|
test_10x_2_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/10xgenomics/test.10x_2.fastq.gz"
|
|
}
|
|
}
|
|
}
|
|
}
|