diff --git a/software/bowtie2/test/indices/E_coli/E_coli.1.bt2 b/software/bowtie2/test/indices/E_coli/E_coli.1.bt2 new file mode 100644 index 00000000..03defbe6 Binary files /dev/null and b/software/bowtie2/test/indices/E_coli/E_coli.1.bt2 differ diff --git a/software/bowtie2/test/indices/E_coli/E_coli.2.bt2 b/software/bowtie2/test/indices/E_coli/E_coli.2.bt2 new file mode 100644 index 00000000..90cdc20f Binary files /dev/null and b/software/bowtie2/test/indices/E_coli/E_coli.2.bt2 differ diff --git a/software/bowtie2/test/indices/E_coli/E_coli.3.bt2 b/software/bowtie2/test/indices/E_coli/E_coli.3.bt2 new file mode 100644 index 00000000..171a3625 Binary files /dev/null and b/software/bowtie2/test/indices/E_coli/E_coli.3.bt2 differ diff --git a/software/bowtie2/test/indices/E_coli/E_coli.4.bt2 b/software/bowtie2/test/indices/E_coli/E_coli.4.bt2 new file mode 100644 index 00000000..b2dc290b Binary files /dev/null and b/software/bowtie2/test/indices/E_coli/E_coli.4.bt2 differ diff --git a/software/bowtie2/test/indices/E_coli/E_coli.rev.1.bt2 b/software/bowtie2/test/indices/E_coli/E_coli.rev.1.bt2 new file mode 100644 index 00000000..9fa63794 Binary files /dev/null and b/software/bowtie2/test/indices/E_coli/E_coli.rev.1.bt2 differ diff --git a/software/bowtie2/test/indices/E_coli/E_coli.rev.2.bt2 b/software/bowtie2/test/indices/E_coli/E_coli.rev.2.bt2 new file mode 100644 index 00000000..a78402f5 Binary files /dev/null and b/software/bowtie2/test/indices/E_coli/E_coli.rev.2.bt2 differ diff --git a/software/bowtie2/test/indices/E_coli/NC_010473.fa b/software/bowtie2/test/indices/E_coli/NC_010473.fa new file mode 120000 index 00000000..d2298ff8 --- /dev/null +++ b/software/bowtie2/test/indices/E_coli/NC_010473.fa @@ -0,0 +1 @@ +../../../../../tests/data/fasta/E_coli/NC_010473.fa \ No newline at end of file diff --git a/software/bowtie2/test/input/Ecoli_DNA_R1.fastq.gz b/software/bowtie2/test/input/Ecoli_DNA_R1.fastq.gz new file mode 120000 index 00000000..c325c16c --- /dev/null +++ b/software/bowtie2/test/input/Ecoli_DNA_R1.fastq.gz @@ -0,0 +1 @@ +../../../../tests/data/fastq/dna/Ecoli_DNA_R1.fastq.gz \ No newline at end of file diff --git a/software/bowtie2/test/input/Ecoli_DNA_R2.fastq.gz b/software/bowtie2/test/input/Ecoli_DNA_R2.fastq.gz new file mode 120000 index 00000000..2d974f67 --- /dev/null +++ b/software/bowtie2/test/input/Ecoli_DNA_R2.fastq.gz @@ -0,0 +1 @@ +../../../../tests/data/fastq/dna/Ecoli_DNA_R2.fastq.gz \ No newline at end of file diff --git a/software/bowtie2/test/input/test_R1_val_1.fq.gz b/software/bowtie2/test/input/test_R1_val_1.fq.gz new file mode 120000 index 00000000..88ccdc87 --- /dev/null +++ b/software/bowtie2/test/input/test_R1_val_1.fq.gz @@ -0,0 +1 @@ +../../../../tests/data/fastq/rna/test_R1_val_1.fq.gz \ No newline at end of file diff --git a/software/bowtie2/test/input/test_R2_val_2.fq.gz b/software/bowtie2/test/input/test_R2_val_2.fq.gz new file mode 120000 index 00000000..440be644 --- /dev/null +++ b/software/bowtie2/test/input/test_R2_val_2.fq.gz @@ -0,0 +1 @@ +../../../../tests/data/fastq/rna/test_R2_val_2.fq.gz \ No newline at end of file diff --git a/software/bowtie2/test/output/Ecoli_DNA_R_E_coli_bowtie2.bam b/software/bowtie2/test/output/Ecoli_DNA_R_E_coli_bowtie2.bam new file mode 100644 index 00000000..dfaa3e54 Binary files /dev/null and b/software/bowtie2/test/output/Ecoli_DNA_R_E_coli_bowtie2.bam differ diff --git a/software/bowtie2/test/output/Ecoli_DNA_R_E_coli_bowtie2_stats.txt b/software/bowtie2/test/output/Ecoli_DNA_R_E_coli_bowtie2_stats.txt new file mode 100644 index 00000000..bc6ab152 --- /dev/null +++ b/software/bowtie2/test/output/Ecoli_DNA_R_E_coli_bowtie2_stats.txt @@ -0,0 +1,15 @@ +10000 reads; of these: + 10000 (100.00%) were paired; of these: + 893 (8.93%) aligned concordantly 0 times + 8474 (84.74%) aligned concordantly exactly 1 time + 633 (6.33%) aligned concordantly >1 times + ---- + 893 pairs aligned concordantly 0 times; of these: + 815 (91.27%) aligned discordantly 1 time + ---- + 78 pairs aligned 0 times concordantly or discordantly; of these: + 156 mates make up the pairs; of these: + 0 (0.00%) aligned 0 times + 1 (0.64%) aligned exactly 1 time + 155 (99.36%) aligned >1 times +100.00% overall alignment rate diff --git a/software/bowtie2/test/output/test_GRCm38_bowtie2.bam b/software/bowtie2/test/output/test_GRCm38_bowtie2.bam new file mode 100644 index 00000000..2177aef9 Binary files /dev/null and b/software/bowtie2/test/output/test_GRCm38_bowtie2.bam differ diff --git a/software/bowtie2/test/output/test_GRCm38_bowtie2_stats.txt b/software/bowtie2/test/output/test_GRCm38_bowtie2_stats.txt new file mode 100644 index 00000000..38a6ca98 --- /dev/null +++ b/software/bowtie2/test/output/test_GRCm38_bowtie2_stats.txt @@ -0,0 +1,15 @@ +9979 reads; of these: + 9979 (100.00%) were paired; of these: + 3584 (35.92%) aligned concordantly 0 times + 3705 (37.13%) aligned concordantly exactly 1 time + 2690 (26.96%) aligned concordantly >1 times + ---- + 3584 pairs aligned concordantly 0 times; of these: + 886 (24.72%) aligned discordantly 1 time + ---- + 2698 pairs aligned 0 times concordantly or discordantly; of these: + 5396 mates make up the pairs; of these: + 2282 (42.29%) aligned 0 times + 1467 (27.19%) aligned exactly 1 time + 1647 (30.52%) aligned >1 times +88.57% overall alignment rate diff --git a/software/fastq_screen/test/input/test_R1.fastq.gz b/software/fastq_screen/test/input/test_R1.fastq.gz new file mode 120000 index 00000000..e7b4b614 --- /dev/null +++ b/software/fastq_screen/test/input/test_R1.fastq.gz @@ -0,0 +1 @@ +../../../../tests/data/fastq/rna/test_R1.fastq.gz \ No newline at end of file diff --git a/software/fastq_screen/test/input/test_R1_val_1.fq.gz b/software/fastq_screen/test/input/test_R1_val_1.fq.gz new file mode 120000 index 00000000..88ccdc87 --- /dev/null +++ b/software/fastq_screen/test/input/test_R1_val_1.fq.gz @@ -0,0 +1 @@ +../../../../tests/data/fastq/rna/test_R1_val_1.fq.gz \ No newline at end of file diff --git a/software/fastq_screen/test/input/test_R2.fastq.gz b/software/fastq_screen/test/input/test_R2.fastq.gz new file mode 120000 index 00000000..5b7b57a3 --- /dev/null +++ b/software/fastq_screen/test/input/test_R2.fastq.gz @@ -0,0 +1 @@ +../../../../tests/data/fastq/rna/test_R2.fastq.gz \ No newline at end of file diff --git a/software/fastq_screen/test/input/test_R2_val_2.fq.gz b/software/fastq_screen/test/input/test_R2_val_2.fq.gz new file mode 120000 index 00000000..440be644 --- /dev/null +++ b/software/fastq_screen/test/input/test_R2_val_2.fq.gz @@ -0,0 +1 @@ +../../../../tests/data/fastq/rna/test_R2_val_2.fq.gz \ No newline at end of file diff --git a/software/fastq_screen/test/input/test_single_end.fastq.gz b/software/fastq_screen/test/input/test_single_end.fastq.gz new file mode 120000 index 00000000..b1f79001 --- /dev/null +++ b/software/fastq_screen/test/input/test_single_end.fastq.gz @@ -0,0 +1 @@ +../../../../tests/data/fastq/rna/test_single_end.fastq.gz \ No newline at end of file diff --git a/software/fastq_screen/test/output/test_R1_screen.txt b/software/fastq_screen/test/output/test_R1_screen.txt new file mode 100644 index 00000000..4a4f3890 --- /dev/null +++ b/software/fastq_screen/test/output/test_R1_screen.txt @@ -0,0 +1,31 @@ +#Fastq_screen version: 0.14.0 #Aligner: bowtie2 #Reads in subset: 100000 +Genome #Reads_processed #Unmapped %Unmapped #One_hit_one_genome %One_hit_one_genome #Multiple_hits_one_genome %Multiple_hits_one_genome #One_hit_multiple_genomes %One_hit_multiple_genomes Multiple_hits_multiple_genomes %Multiple_hits_multiple_genomes +Cat 10000 9171 91.71 0 0.00 0 0.00 421 4.21 408 4.08 +Chicken 10000 8932 89.32 0 0.00 0 0.00 64 0.64 1004 10.04 +Cow 10000 8484 84.84 0 0.00 0 0.00 294 2.94 1222 12.22 +Drosophila 10000 9469 94.69 0 0.00 0 0.00 19 0.19 512 5.12 +Human 10000 8367 83.67 2 0.02 3 0.03 354 3.54 1274 12.74 +Mouse 10000 122 1.22 3265 32.65 869 8.69 2066 20.66 3678 36.78 +Pig 10000 8459 84.59 0 0.00 0 0.00 334 3.34 1207 12.07 +Rat 10000 6432 64.32 1 0.01 3 0.03 1334 13.34 2230 22.30 +Zebrafish 10000 9125 91.25 0 0.00 0 0.00 41 0.41 834 8.34 +Arabidopsis 10000 9497 94.97 0 0.00 0 0.00 5 0.05 498 4.98 +Grape 10000 9600 96.00 0 0.00 1 0.01 82 0.82 317 3.17 +Potato 10000 9460 94.60 0 0.00 0 0.00 12 0.12 528 5.28 +Tomato 10000 9521 95.21 0 0.00 0 0.00 45 0.45 434 4.34 +Adapters 10000 10000 100.00 0 0.00 0 0.00 0 0.00 0 0.00 +Brachybacterium 10000 10000 100.00 0 0.00 0 0.00 0 0.00 0 0.00 +Pseudomonas 10000 10000 100.00 0 0.00 0 0.00 0 0.00 0 0.00 +Massilia_oculi 10000 9999 99.99 0 0.00 1 0.01 0 0.00 0 0.00 +Ecoli 10000 9998 99.98 1 0.01 1 0.01 0 0.00 0 0.00 +Lambda 10000 10000 100.00 0 0.00 0 0.00 0 0.00 0 0.00 +MT 10000 7856 78.56 0 0.00 0 0.00 2034 20.34 110 1.10 +PhiX 10000 10000 100.00 0 0.00 0 0.00 0 0.00 0 0.00 +rRNA 10000 9157 91.57 0 0.00 0 0.00 111 1.11 732 7.32 +Wasp 10000 9473 94.73 0 0.00 0 0.00 211 2.11 316 3.16 +Vectors 10000 9713 97.13 0 0.00 0 0.00 52 0.52 235 2.35 +Worm 10000 9645 96.45 0 0.00 0 0.00 13 0.13 342 3.42 +Yeast 10000 9507 95.07 0 0.00 0 0.00 4 0.04 489 4.89 +Mycoplasma 10000 9998 99.98 0 0.00 0 0.00 0 0.00 2 0.02 + +%Hit_no_genomes: 0.88 diff --git a/software/fastqc/test/input/test_R1_val_1.fq.gz b/software/fastqc/test/input/test_R1_val_1.fq.gz new file mode 120000 index 00000000..88ccdc87 --- /dev/null +++ b/software/fastqc/test/input/test_R1_val_1.fq.gz @@ -0,0 +1 @@ +../../../../tests/data/fastq/rna/test_R1_val_1.fq.gz \ No newline at end of file diff --git a/software/fastqc/test/input/test_R2_val_2.fq.gz b/software/fastqc/test/input/test_R2_val_2.fq.gz new file mode 120000 index 00000000..440be644 --- /dev/null +++ b/software/fastqc/test/input/test_R2_val_2.fq.gz @@ -0,0 +1 @@ +../../../../tests/data/fastq/rna/test_R2_val_2.fq.gz \ No newline at end of file diff --git a/software/fastqc/test/output/test_R1_fastqc.html b/software/fastqc/test/output/test_R1_fastqc.html new file mode 100644 index 00000000..f81fd4f4 --- /dev/null +++ b/software/fastqc/test/output/test_R1_fastqc.html @@ -0,0 +1,187 @@ +test_R1.fastq.gz FastQC Report
FastQCFastQC Report
Tue 17 Mar 2020
test_R1.fastq.gz

Summary

[OK]Basic Statistics

MeasureValue
Filenametest_R1.fastq.gz
File typeConventional base calls
EncodingSanger / Illumina 1.9
Total Sequences10000
Sequences flagged as poor quality0
Sequence length76
%GC44

[OK]Per base sequence quality

Per base quality graph

[OK]Per tile sequence quality

Per tile quality graph

[OK]Per sequence quality scores

Per Sequence quality graph

[FAIL]Per base sequence content

Per base sequence content

[OK]Per sequence GC content

Per sequence GC content graph

[OK]Per base N content

N content graph

[OK]Sequence Length Distribution

Sequence length distribution

[OK]Sequence Duplication Levels

Duplication level graph

[WARN]Overrepresented sequences

SequenceCountPercentagePossible Source
GTATCAACGCAGAGTACTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT230.22999999999999998No Hit
GGTATCAACGCAGAGTACTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT160.16No Hit
TATCAACGCAGAGTACTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT150.15No Hit

[OK]Adapter Content

Adapter graph

\ No newline at end of file diff --git a/software/fastqc/test/output/test_R1_fastqc.zip b/software/fastqc/test/output/test_R1_fastqc.zip new file mode 100644 index 00000000..05358336 Binary files /dev/null and b/software/fastqc/test/output/test_R1_fastqc.zip differ diff --git a/software/fastqc/test/output/test_R1_val_1_fastqc.html b/software/fastqc/test/output/test_R1_val_1_fastqc.html new file mode 100644 index 00000000..45c60031 --- /dev/null +++ b/software/fastqc/test/output/test_R1_val_1_fastqc.html @@ -0,0 +1,187 @@ +test_R1_val_1.fq.gz FastQC Report
FastQCFastQC Report
Tue 17 Mar 2020
test_R1_val_1.fq.gz

Summary

[OK]Basic Statistics

MeasureValue
Filenametest_R1_val_1.fq.gz
File typeConventional base calls
EncodingSanger / Illumina 1.9
Total Sequences9979
Sequences flagged as poor quality0
Sequence length20-76
%GC44

[OK]Per base sequence quality

Per base quality graph

[OK]Per tile sequence quality

Per tile quality graph

[OK]Per sequence quality scores

Per Sequence quality graph

[FAIL]Per base sequence content

Per base sequence content

[WARN]Per sequence GC content

Per sequence GC content graph

[OK]Per base N content

N content graph

[WARN]Sequence Length Distribution

Sequence length distribution

[OK]Sequence Duplication Levels

Duplication level graph

[OK]Overrepresented sequences

No overrepresented sequences

[OK]Adapter Content

Adapter graph

\ No newline at end of file diff --git a/software/fastqc/test/output/test_R1_val_1_fastqc.zip b/software/fastqc/test/output/test_R1_val_1_fastqc.zip new file mode 100644 index 00000000..f59827d2 Binary files /dev/null and b/software/fastqc/test/output/test_R1_val_1_fastqc.zip differ diff --git a/software/fastqc/test/output/test_R2_fastqc.html b/software/fastqc/test/output/test_R2_fastqc.html new file mode 100644 index 00000000..ff3435d8 --- /dev/null +++ b/software/fastqc/test/output/test_R2_fastqc.html @@ -0,0 +1,187 @@ +test_R2.fastq.gz FastQC Report
FastQCFastQC Report
Tue 17 Mar 2020
test_R2.fastq.gz

Summary

[OK]Basic Statistics

MeasureValue
Filenametest_R2.fastq.gz
File typeConventional base calls
EncodingSanger / Illumina 1.9
Total Sequences10000
Sequences flagged as poor quality0
Sequence length76
%GC44

[OK]Per base sequence quality

Per base quality graph

[OK]Per tile sequence quality

Per tile quality graph

[OK]Per sequence quality scores

Per Sequence quality graph

[FAIL]Per base sequence content

Per base sequence content

[OK]Per sequence GC content

Per sequence GC content graph

[OK]Per base N content

N content graph

[OK]Sequence Length Distribution

Sequence length distribution

[OK]Sequence Duplication Levels

Duplication level graph

[WARN]Overrepresented sequences

SequenceCountPercentagePossible Source
GTATCAACGCAGAGTACTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT190.19No Hit

[OK]Adapter Content

Adapter graph

\ No newline at end of file diff --git a/software/fastqc/test/output/test_R2_fastqc.zip b/software/fastqc/test/output/test_R2_fastqc.zip new file mode 100644 index 00000000..dcfa2eab Binary files /dev/null and b/software/fastqc/test/output/test_R2_fastqc.zip differ diff --git a/software/fastqc/test/output/test_R2_val_2_fastqc.html b/software/fastqc/test/output/test_R2_val_2_fastqc.html new file mode 100644 index 00000000..1dc83b81 --- /dev/null +++ b/software/fastqc/test/output/test_R2_val_2_fastqc.html @@ -0,0 +1,187 @@ +test_R2_val_2.fq.gz FastQC Report
FastQCFastQC Report
Tue 17 Mar 2020
test_R2_val_2.fq.gz

Summary

[OK]Basic Statistics

MeasureValue
Filenametest_R2_val_2.fq.gz
File typeConventional base calls
EncodingSanger / Illumina 1.9
Total Sequences9979
Sequences flagged as poor quality0
Sequence length20-76
%GC44

[OK]Per base sequence quality

Per base quality graph

[OK]Per tile sequence quality

Per tile quality graph

[OK]Per sequence quality scores

Per Sequence quality graph

[FAIL]Per base sequence content

Per base sequence content

[OK]Per sequence GC content

Per sequence GC content graph

[OK]Per base N content

N content graph

[WARN]Sequence Length Distribution

Sequence length distribution

[OK]Sequence Duplication Levels

Duplication level graph

[OK]Overrepresented sequences

No overrepresented sequences

[OK]Adapter Content

Adapter graph

\ No newline at end of file diff --git a/software/fastqc/test/output/test_R2_val_2_fastqc.zip b/software/fastqc/test/output/test_R2_val_2_fastqc.zip new file mode 100644 index 00000000..d6eb7389 Binary files /dev/null and b/software/fastqc/test/output/test_R2_val_2_fastqc.zip differ diff --git a/software/hisat2/test/indices/E_coli/E_coli.1.ht2 b/software/hisat2/test/indices/E_coli/E_coli.1.ht2 new file mode 100644 index 00000000..21db313f Binary files /dev/null and b/software/hisat2/test/indices/E_coli/E_coli.1.ht2 differ diff --git a/software/hisat2/test/indices/E_coli/E_coli.2.ht2 b/software/hisat2/test/indices/E_coli/E_coli.2.ht2 new file mode 100644 index 00000000..90cdc20f Binary files /dev/null and b/software/hisat2/test/indices/E_coli/E_coli.2.ht2 differ diff --git a/software/hisat2/test/indices/E_coli/E_coli.3.ht2 b/software/hisat2/test/indices/E_coli/E_coli.3.ht2 new file mode 100644 index 00000000..171a3625 Binary files /dev/null and b/software/hisat2/test/indices/E_coli/E_coli.3.ht2 differ diff --git a/software/hisat2/test/indices/E_coli/E_coli.4.ht2 b/software/hisat2/test/indices/E_coli/E_coli.4.ht2 new file mode 100644 index 00000000..b2dc290b Binary files /dev/null and b/software/hisat2/test/indices/E_coli/E_coli.4.ht2 differ diff --git a/software/hisat2/test/indices/E_coli/E_coli.5.ht2 b/software/hisat2/test/indices/E_coli/E_coli.5.ht2 new file mode 100644 index 00000000..e49b8cc0 Binary files /dev/null and b/software/hisat2/test/indices/E_coli/E_coli.5.ht2 differ diff --git a/software/hisat2/test/indices/E_coli/E_coli.6.ht2 b/software/hisat2/test/indices/E_coli/E_coli.6.ht2 new file mode 100644 index 00000000..8ed6edd4 Binary files /dev/null and b/software/hisat2/test/indices/E_coli/E_coli.6.ht2 differ diff --git a/software/hisat2/test/indices/E_coli/E_coli.7.ht2 b/software/hisat2/test/indices/E_coli/E_coli.7.ht2 new file mode 100644 index 00000000..32354e90 Binary files /dev/null and b/software/hisat2/test/indices/E_coli/E_coli.7.ht2 differ diff --git a/software/hisat2/test/indices/E_coli/E_coli.8.ht2 b/software/hisat2/test/indices/E_coli/E_coli.8.ht2 new file mode 100644 index 00000000..20d5cb86 Binary files /dev/null and b/software/hisat2/test/indices/E_coli/E_coli.8.ht2 differ diff --git a/software/hisat2/test/indices/E_coli/NC_010473.fa b/software/hisat2/test/indices/E_coli/NC_010473.fa new file mode 120000 index 00000000..d2298ff8 --- /dev/null +++ b/software/hisat2/test/indices/E_coli/NC_010473.fa @@ -0,0 +1 @@ +../../../../../tests/data/fasta/E_coli/NC_010473.fa \ No newline at end of file diff --git a/software/hisat2/test/input/Ecoli_DNA_R1.fastq.gz b/software/hisat2/test/input/Ecoli_DNA_R1.fastq.gz new file mode 120000 index 00000000..c325c16c --- /dev/null +++ b/software/hisat2/test/input/Ecoli_DNA_R1.fastq.gz @@ -0,0 +1 @@ +../../../../tests/data/fastq/dna/Ecoli_DNA_R1.fastq.gz \ No newline at end of file diff --git a/software/hisat2/test/input/Ecoli_DNA_R2.fastq.gz b/software/hisat2/test/input/Ecoli_DNA_R2.fastq.gz new file mode 120000 index 00000000..2d974f67 --- /dev/null +++ b/software/hisat2/test/input/Ecoli_DNA_R2.fastq.gz @@ -0,0 +1 @@ +../../../../tests/data/fastq/dna/Ecoli_DNA_R2.fastq.gz \ No newline at end of file diff --git a/software/hisat2/test/output/Ecoli_DNA_R_E_coli_hisat2.bam b/software/hisat2/test/output/Ecoli_DNA_R_E_coli_hisat2.bam new file mode 100644 index 00000000..a7a891a8 Binary files /dev/null and b/software/hisat2/test/output/Ecoli_DNA_R_E_coli_hisat2.bam differ diff --git a/software/hisat2/test/output/Ecoli_DNA_R_E_coli_hisat2_stats.txt b/software/hisat2/test/output/Ecoli_DNA_R_E_coli_hisat2_stats.txt new file mode 100644 index 00000000..2752674a --- /dev/null +++ b/software/hisat2/test/output/Ecoli_DNA_R_E_coli_hisat2_stats.txt @@ -0,0 +1,6 @@ +10000 reads; of these: + 10000 (100.00%) were paired; of these: + 823 (8.23%) aligned concordantly 0 times + 8583 (85.83%) aligned concordantly exactly 1 time + 594 (5.94%) aligned concordantly >1 times +91.77% overall alignment rate diff --git a/software/multiqc/test/input/bowtie2/test_GRCm38_bowtie2_stats.txt b/software/multiqc/test/input/bowtie2/test_GRCm38_bowtie2_stats.txt new file mode 120000 index 00000000..b8967087 --- /dev/null +++ b/software/multiqc/test/input/bowtie2/test_GRCm38_bowtie2_stats.txt @@ -0,0 +1 @@ +../../../../bowtie2/test/output/test_GRCm38_bowtie2_stats.txt \ No newline at end of file diff --git a/software/multiqc/test/input/fastq_screen/test_R1_screen.txt b/software/multiqc/test/input/fastq_screen/test_R1_screen.txt new file mode 120000 index 00000000..6ca7bf7e --- /dev/null +++ b/software/multiqc/test/input/fastq_screen/test_R1_screen.txt @@ -0,0 +1 @@ +../../../../fastq_screen/test/output/test_R1_screen.txt \ No newline at end of file diff --git a/software/multiqc/test/input/fastqc/test_R1_fastqc.zip b/software/multiqc/test/input/fastqc/test_R1_fastqc.zip new file mode 120000 index 00000000..0d58f91a --- /dev/null +++ b/software/multiqc/test/input/fastqc/test_R1_fastqc.zip @@ -0,0 +1 @@ +../../../../fastqc/test/output/test_R1_fastqc.zip \ No newline at end of file diff --git a/software/multiqc/test/input/fastqc/test_R1_val_1_fastqc.zip b/software/multiqc/test/input/fastqc/test_R1_val_1_fastqc.zip new file mode 120000 index 00000000..cd7c4d8b --- /dev/null +++ b/software/multiqc/test/input/fastqc/test_R1_val_1_fastqc.zip @@ -0,0 +1 @@ +../../../../fastqc/test/output/test_R1_val_1_fastqc.zip \ No newline at end of file diff --git a/software/multiqc/test/input/fastqc/test_R2_fastqc.zip b/software/multiqc/test/input/fastqc/test_R2_fastqc.zip new file mode 120000 index 00000000..abae135e --- /dev/null +++ b/software/multiqc/test/input/fastqc/test_R2_fastqc.zip @@ -0,0 +1 @@ +../../../../fastqc/test/output/test_R2_fastqc.zip \ No newline at end of file diff --git a/software/multiqc/test/input/fastqc/test_R2_val_2_fastqc.zip b/software/multiqc/test/input/fastqc/test_R2_val_2_fastqc.zip new file mode 120000 index 00000000..ce42f0d9 --- /dev/null +++ b/software/multiqc/test/input/fastqc/test_R2_val_2_fastqc.zip @@ -0,0 +1 @@ +../../../../fastqc/test/output/test_R2_val_2_fastqc.zip \ No newline at end of file diff --git a/software/multiqc/test/input/hisat2/Ecoli_DNA_R_E_coli_hisat2_stats.txt b/software/multiqc/test/input/hisat2/Ecoli_DNA_R_E_coli_hisat2_stats.txt new file mode 120000 index 00000000..a1c92656 --- /dev/null +++ b/software/multiqc/test/input/hisat2/Ecoli_DNA_R_E_coli_hisat2_stats.txt @@ -0,0 +1 @@ +../../../../hisat2/test/output/Ecoli_DNA_R_E_coli_hisat2_stats.txt \ No newline at end of file diff --git a/software/multiqc/test/input/trim_galore/test_R1.fastq.gz_trimming_report.txt b/software/multiqc/test/input/trim_galore/test_R1.fastq.gz_trimming_report.txt new file mode 120000 index 00000000..69891ace --- /dev/null +++ b/software/multiqc/test/input/trim_galore/test_R1.fastq.gz_trimming_report.txt @@ -0,0 +1 @@ +../../../../trim_galore/test/output/test_R1.fastq.gz_trimming_report.txt \ No newline at end of file diff --git a/software/multiqc/test/input/trim_galore/test_R2.fastq.gz_trimming_report.txt b/software/multiqc/test/input/trim_galore/test_R2.fastq.gz_trimming_report.txt new file mode 120000 index 00000000..3f6cee73 --- /dev/null +++ b/software/multiqc/test/input/trim_galore/test_R2.fastq.gz_trimming_report.txt @@ -0,0 +1 @@ +../../../../trim_galore/test/output/test_R2.fastq.gz_trimming_report.txt \ No newline at end of file diff --git a/software/multiqc/test/output/multiqc_report.html b/software/multiqc/test/output/multiqc_report.html new file mode 100644 index 00000000..3db17237 --- /dev/null +++ b/software/multiqc/test/output/multiqc_report.html @@ -0,0 +1,6244 @@ + + + + + + + + + + + + + +MultiQC Report + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+

+ + + + + + +

+ +

Loading report..

+ +
+ +
+
+ + + +
+ + + + +
+ + + + +
+

+ + Highlight Samples +

+ +
+ + + +
+

+ Regex mode off + + +

+
    +
    + + +
    +

    + + Rename Samples +

    + +
    + + + +
    +

    Click here for bulk input.

    +
    +

    Paste two columns of a tab-delimited table here (eg. from Excel).

    +

    First column should be the old name, second column the new name.

    +
    + + +
    +
    +

    + Regex mode off + + +

    +
      +
      + + +
      +

      + + Show / Hide Samples +

      + +
      +
      + +
      +
      + +
      +
      + + +
      +
      + +

      + Regex mode off + + +

      +
        +
        + + +
        +

        Export Plots

        +
        + +
        +
        +
        +
        +
        + + px +
        +
        +
        +
        + + px +
        +
        +
        +
        +
        + +
        +
        + +
        +
        +
        +
        + +
        +
        +
        + + X +
        +
        +
        +
        + +
        +

        Download the raw data used to create the plots in this report below:

        +
        +
        + +
        +
        + +
        +
        + +

        Note that additional data was saved in multiqc_data when this report was generated.

        + +
        +
        +
        + +
        +
        Choose Plots
        + + +
        + +
        + +

        If you use plots from MultiQC in a publication or presentation, please cite:

        +
        + MultiQC: Summarize analysis results for multiple tools and samples in a single report
        + Philip Ewels, Måns Magnusson, Sverker Lundin and Max Käller
        + Bioinformatics (2016)
        + doi: 10.1093/bioinformatics/btw354
        + PMID: 27312411 +
        +
        +
        + + +
        +

        Save Settings

        +

        You can save the toolbox settings for this report to the browser.

        +
        + + +
        +
        + +

        Load Settings

        +

        Choose a saved report profile from the dropdown box below:

        +
        +
        + +
        +
        + + + + +
        +
        +
        + + +
        +

        About MultiQC

        +

        This report was generated using MultiQC, version 1.7

        +

        You can see a YouTube video describing how to use MultiQC reports here: + https://youtu.be/qPbIlO_KWN0

        +

        For more information about MultiQC, including other videos and + extensive documentation, please visit http://multiqc.info

        +

        You can report bugs, suggest improvements and find the source code for MultiQC on GitHub: + https://github.com/ewels/MultiQC

        +

        MultiQC is published in Bioinformatics:

        +
        + MultiQC: Summarize analysis results for multiple tools and samples in a single report
        + Philip Ewels, Måns Magnusson, Sverker Lundin and Max Käller
        + Bioinformatics (2016)
        + doi: 10.1093/bioinformatics/btw354
        + PMID: 27312411 +
        +
        + +
        + +
        + + +
        + + + +

        + + + + +

        + + + +

        + A modular tool to aggregate results from bioinformatics analyses across many samples into a single report. +

        + + + + + + + + + + +
        +

        Report generated on 2020-03-18, 10:46 based on data in: + /bi/home/fkrueger/VersionControl/nf-core-modules/tools/multiqc/test/work/52/07836c4fe43e822e375798bf42c0e4 + +

        + + + +
        + + + + + + + + +
        +

        General Statistics

        + + + + + + + + + + Showing 5/5 rows and 5/7 columns. + +
        +
        + +
        Sample Name% Aligned% Trimmed% Dups% GCM Seqs
        test_GRCm38_bowtie2_stats
        88.6%
        test_R1
        1.5%
        8.3%
        44%
        0.0
        test_R1_val_1
        7.0%
        44%
        0.0
        test_R2
        1.9%
        8.4%
        44%
        0.0
        test_R2_val_2
        7.2%
        44%
        0.0
        + + +
        + + + + + + +
        +

        Bowtie 2

        +

        Bowtie 2 is an ultrafast and memory-efficient tool for aligning sequencing reads to long reference sequences.

        + + + + + + +
        + +

        This plot shows the number of reads aligning to the reference in different ways.
        Please note that single mate alignment counts are halved to tally with pair counts properly.

        + + +
        +

        There are 6 possible types of alignment: +* PE mapped uniquely: Pair has only one occurence in the reference genome. +* PE mapped discordantly uniquely: Pair has only one occurence but not in proper pair. +* PE one mate mapped uniquely: One read of a pair has one occurence. +* PE multimapped: Pair has multiple occurence. +* PE one mate multimapped: One read of a pair has multiple occurence. +* PE neither mate aligned: Pair has no occurence.

        +
        + +
        + + +
        +
        loading..
        +
        + + +
        + + +
        +
        + + + +
        +

        Cutadapt

        +

        Cutadapt is a tool to find and remove adapter sequences, primers, poly-Atails and other types of unwanted sequence from your high-throughput sequencing reads.

        + + + + +
        + +

        This plot shows the number of reads with certain lengths of adapter trimmed. + Obs/Exp shows the raw counts divided by the number expected due to sequencing errors. A defined peak + may be related to adapter length. See the + cutadapt documentation + for more information on how these numbers are generated.

        + + +
        + + +
        + +
        loading..
        +
        + + +
        + + +
        +
        + + + +
        +

        FastQ Screen

        +

        FastQ Screen allows you to screen a library of sequences in FastQ format against a set of sequence databases so you can see if the composition of the library matches with what you expect.

        + + + + +
        + + + + +
        +
        + + +
        + + +
        +
        + + + +
        +

        FastQC

        +

        FastQC is a quality control tool for high throughput sequence data, written by Simon Andrews at the Babraham Institute in Cambridge.

        + + + + +
        + +

        + Sequence Counts + + + +

        + +

        Sequence counts for each sample. Duplicate read counts are an estimate only.

        + + +
        +

        This plot show the total number of reads, broken down into unique and duplicate +if possible (only more recent versions of FastQC give duplicate info).

        +

        You can read more about duplicate calculation in the +FastQC documentation. +A small part has been copied here for convenience:

        +

        Only sequences which first appear in the first 100,000 sequences +in each file are analysed. This should be enough to get a good impression +for the duplication levels in the whole file. Each sequence is tracked to +the end of the file to give a representative count of the overall duplication level.

        +

        The duplication detection requires an exact sequence match over the whole length of +the sequence. Any reads over 75bp in length are truncated to 50bp for this analysis.

        +
        + +
        + + +
        +
        loading..
        +
        + +
        +
        + + + + +
        + +

        + Sequence Quality Histograms + + + +

        + +

        The mean quality value across each base position in the read.

        + + +
        +

        To enable multiple samples to be plotted on the same graph, only the mean quality +scores are plotted (unlike the box plots seen in FastQC reports).

        +

        Taken from the FastQC help:

        +

        The y-axis on the graph shows the quality scores. The higher the score, the better +the base call. The background of the graph divides the y axis into very good quality +calls (green), calls of reasonable quality (orange), and calls of poor quality (red). +The quality of calls on most platforms will degrade as the run progresses, so it is +common to see base calls falling into the orange area towards the end of a read.

        +
        + +
        loading..
        +
        + +
        +
        + + + + +
        + +

        + Per Sequence Quality Scores + + + +

        + +

        The number of reads with average quality scores. Shows if a subset of reads has poor quality.

        + + +
        +

        From the FastQC help:

        +

        The per sequence quality score report allows you to see if a subset of your +sequences have universally low quality values. It is often the case that a +subset of sequences will have universally poor quality, however these should +represent only a small percentage of the total sequences.

        +
        + +
        loading..
        +
        + +
        +
        + + + + +
        + +

        + Per Base Sequence Content + + + +

        + +

        The proportion of each base position for which each of the four normal DNA bases has been called.

        + + +
        +

        To enable multiple samples to be shown in a single plot, the base composition data +is shown as a heatmap. The colours represent the balance between the four bases: +an even distribution should give an even muddy brown colour. Hover over the plot +to see the percentage of the four bases under the cursor.

        +

        To see the data as a line plot, as in the original FastQC graph, click on a sample track.

        +

        From the FastQC help:

        +

        Per Base Sequence Content plots out the proportion of each base position in a +file for which each of the four normal DNA bases has been called.

        +

        In a random library you would expect that there would be little to no difference +between the different bases of a sequence run, so the lines in this plot should +run parallel with each other. The relative amount of each base should reflect +the overall amount of these bases in your genome, but in any case they should +not be hugely imbalanced from each other.

        +

        It's worth noting that some types of library will always produce biased sequence +composition, normally at the start of the read. Libraries produced by priming +using random hexamers (including nearly all RNA-Seq libraries) and those which +were fragmented using transposases inherit an intrinsic bias in the positions +at which reads start. This bias does not concern an absolute sequence, but instead +provides enrichement of a number of different K-mers at the 5' end of the reads. +Whilst this is a true technical bias, it isn't something which can be corrected +by trimming and in most cases doesn't seem to adversely affect the downstream +analysis.

        +
        + +
        +
        +
        + + Click a sample row to see a line plot for that dataset. +
        +
        Rollover for sample name
        + +
        + Position: - +
        %T: -
        +
        %C: -
        +
        %A: -
        +
        %G: -
        +
        +
        +
        + +
        +
        +
        +
        + +
        +
        + + + + +
        + +

        + Per Sequence GC Content + + + +

        + +

        The average GC content of reads. Normal random library typically have a + roughly normal distribution of GC content.

        + + +
        +

        From the FastQC help:

        +

        This module measures the GC content across the whole length of each sequence +in a file and compares it to a modelled normal distribution of GC content.

        +

        In a normal random library you would expect to see a roughly normal distribution +of GC content where the central peak corresponds to the overall GC content of +the underlying genome. Since we don't know the the GC content of the genome the +modal GC content is calculated from the observed data and used to build a +reference distribution.

        +

        An unusually shaped distribution could indicate a contaminated library or +some other kinds of biased subset. A normal distribution which is shifted +indicates some systematic bias which is independent of base position. If there +is a systematic bias which creates a shifted normal distribution then this won't +be flagged as an error by the module since it doesn't know what your genome's +GC content should be.

        +
        + +
        + + +
        + +
        loading..
        +
        + +
        +
        + + + + +
        + +

        + Per Base N Content + + + +

        + +

        The percentage of base calls at each position for which an N was called.

        + + +
        +

        From the FastQC help:

        +

        If a sequencer is unable to make a base call with sufficient confidence then it will +normally substitute an N rather than a conventional base call. This graph shows the +percentage of base calls at each position for which an N was called.

        +

        It's not unusual to see a very low proportion of Ns appearing in a sequence, especially +nearer the end of a sequence. However, if this proportion rises above a few percent +it suggests that the analysis pipeline was unable to interpret the data well enough to +make valid base calls.

        +
        + +
        loading..
        +
        + +
        +
        + + + + +
        + +

        + Sequence Length Distribution + +

        + +

        The distribution of fragment sizes (read lengths) found. + See the FastQC help

        + + +
        loading..
        +
        + +
        +
        + + + + +
        + +

        + Sequence Duplication Levels + + + +

        + +

        The relative level of duplication found for every sequence.

        + + +
        +

        From the FastQC Help:

        +

        In a diverse library most sequences will occur only once in the final set. +A low level of duplication may indicate a very high level of coverage of the +target sequence, but a high level of duplication is more likely to indicate +some kind of enrichment bias (eg PCR over amplification). This graph shows +the degree of duplication for every sequence in a library: the relative +number of sequences with different degrees of duplication.

        +

        Only sequences which first appear in the first 100,000 sequences +in each file are analysed. This should be enough to get a good impression +for the duplication levels in the whole file. Each sequence is tracked to +the end of the file to give a representative count of the overall duplication level.

        +

        The duplication detection requires an exact sequence match over the whole length of +the sequence. Any reads over 75bp in length are truncated to 50bp for this analysis.

        +

        In a properly diverse library most sequences should fall into the far left of the +plot in both the red and blue lines. A general level of enrichment, indicating broad +oversequencing in the library will tend to flatten the lines, lowering the low end +and generally raising other categories. More specific enrichments of subsets, or +the presence of low complexity contaminants will tend to produce spikes towards the +right of the plot.

        +
        + +
        loading..
        +
        + +
        +
        + + + + +
        + +

        + Overrepresented sequences + + + +

        + +

        The total amount of overrepresented sequences found in each library.

        + + +
        +

        FastQC calculates and lists overrepresented sequences in FastQ files. It would not be +possible to show this for all samples in a MultiQC report, so instead this plot shows +the number of sequences categorized as over represented.

        +

        Sometimes, a single sequence may account for a large number of reads in a dataset. +To show this, the bars are split into two: the first shows the overrepresented reads +that come from the single most common sequence. The second shows the total count +from all remaining overrepresented sequences.

        +

        From the FastQC Help:

        +

        A normal high-throughput library will contain a diverse set of sequences, with no +individual sequence making up a tiny fraction of the whole. Finding that a single +sequence is very overrepresented in the set either means that it is highly biologically +significant, or indicates that the library is contaminated, or not as diverse as you expected.

        +

        FastQC lists all of the sequences which make up more than 0.1% of the total. +To conserve memory only sequences which appear in the first 100,000 sequences are tracked +to the end of the file. It is therefore possible that a sequence which is overrepresented +but doesn't appear at the start of the file for some reason could be missed by this module.

        +
        + +
        4 samples had less than 1% of reads made up of overrepresented sequences
        + +
        +
        + + + + +
        + +

        + Adapter Content + + + +

        + +

        The cumulative percentage count of the proportion of your + library which has seen each of the adapter sequences at each position.

        + + +
        +

        Note that only samples with ≥ 0.1% adapter contamination are shown.

        +

        There may be several lines per sample, as one is shown for each adapter +detected in the file.

        +

        From the FastQC Help:

        +

        The plot shows a cumulative percentage count of the proportion +of your library which has seen each of the adapter sequences at each position. +Once a sequence has been seen in a read it is counted as being present +right through to the end of the read so the percentages you see will only +increase as the read length goes on.

        +
        + +
        loading..
        +
        + + +
        + + +
        + + + + +
        + + + + + + + + + + + + + + + + diff --git a/software/trim_galore/test/input/test_R1.fastq.gz b/software/trim_galore/test/input/test_R1.fastq.gz new file mode 120000 index 00000000..e7b4b614 --- /dev/null +++ b/software/trim_galore/test/input/test_R1.fastq.gz @@ -0,0 +1 @@ +../../../../tests/data/fastq/rna/test_R1.fastq.gz \ No newline at end of file diff --git a/software/trim_galore/test/input/test_R2.fastq.gz b/software/trim_galore/test/input/test_R2.fastq.gz new file mode 120000 index 00000000..5b7b57a3 --- /dev/null +++ b/software/trim_galore/test/input/test_R2.fastq.gz @@ -0,0 +1 @@ +../../../../tests/data/fastq/rna/test_R2.fastq.gz \ No newline at end of file diff --git a/software/trim_galore/test/output/test_R1.fastq.gz_trimming_report.txt b/software/trim_galore/test/output/test_R1.fastq.gz_trimming_report.txt new file mode 100644 index 00000000..cf0d4bfa --- /dev/null +++ b/software/trim_galore/test/output/test_R1.fastq.gz_trimming_report.txt @@ -0,0 +1,97 @@ + +SUMMARISING RUN PARAMETERS +========================== +Input filename: test_R1.fastq.gz +Trimming mode: paired-end +Trim Galore version: 0.6.5 +Cutadapt version: 2.3 +Number of cores used for trimming: 1 +Quality Phred score cutoff: 20 +Quality encoding type selected: ASCII+33 +Using Nextera adapter for trimming (count: 83). Second best hit was smallRNA (count: 0) +Adapter sequence: 'CTGTCTCTTATA' (Nextera Transposase sequence; auto-detected) +Maximum trimming error rate: 0.1 (default) +Minimum required adapter overlap (stringency): 1 bp +Minimum required sequence length for both reads before a sequence pair gets removed: 20 bp +Output file will be GZIP compressed + + +This is cutadapt 2.3 with Python 3.7.3 +Command line parameters: -j 1 -e 0.1 -q 20 -O 1 -a CTGTCTCTTATA test_R1.fastq.gz +Processing reads on 1 core in single-end mode ... +Finished in 0.19 s (19 us/read; 3.12 M reads/minute). + +=== Summary === + +Total reads processed: 10,000 +Reads with adapters: 3,225 (32.2%) +Reads written (passing filters): 10,000 (100.0%) + +Total basepairs processed: 760,000 bp +Quality-trimmed: 4,492 bp (0.6%) +Total written (filtered): 748,403 bp (98.5%) + +=== Adapter 1 === + +Sequence: CTGTCTCTTATA; Type: regular 3'; Length: 12; Trimmed: 3225 times. + +No. of allowed errors: +0-9 bp: 0; 10-12 bp: 1 + +Bases preceding removed adapters: + A: 23.8% + C: 28.2% + G: 22.7% + T: 25.3% + none/other: 0.0% + +Overview of removed sequences +length count expect max.err error counts +1 2170 2500.0 0 2170 +2 622 625.0 0 622 +3 223 156.2 0 223 +4 64 39.1 0 64 +5 14 9.8 0 14 +6 9 2.4 0 9 +7 8 0.6 0 8 +8 5 0.2 0 5 +9 4 0.0 0 4 +10 8 0.0 1 7 1 +11 3 0.0 1 3 +12 4 0.0 1 4 +13 6 0.0 1 6 +14 5 0.0 1 4 1 +15 5 0.0 1 5 +16 6 0.0 1 5 1 +17 3 0.0 1 3 +18 3 0.0 1 3 +19 1 0.0 1 1 +20 3 0.0 1 3 +21 7 0.0 1 7 +22 7 0.0 1 7 +23 3 0.0 1 3 +24 6 0.0 1 6 +25 4 0.0 1 4 +26 2 0.0 1 2 +27 4 0.0 1 4 +28 1 0.0 1 1 +29 3 0.0 1 3 +30 4 0.0 1 4 +32 3 0.0 1 3 +33 2 0.0 1 1 1 +34 1 0.0 1 1 +35 1 0.0 1 1 +40 1 0.0 1 1 +42 1 0.0 1 0 1 +45 1 0.0 1 0 1 +49 1 0.0 1 0 1 +52 1 0.0 1 0 1 +56 2 0.0 1 0 2 +59 1 0.0 1 0 1 +67 1 0.0 1 0 1 +70 2 0.0 1 0 2 + +RUN STATISTICS FOR INPUT FILE: test_R1.fastq.gz +============================================= +10000 sequences processed in total + diff --git a/software/trim_galore/test/output/test_R1_val_1.fq.gz b/software/trim_galore/test/output/test_R1_val_1.fq.gz new file mode 120000 index 00000000..88ccdc87 --- /dev/null +++ b/software/trim_galore/test/output/test_R1_val_1.fq.gz @@ -0,0 +1 @@ +../../../../tests/data/fastq/rna/test_R1_val_1.fq.gz \ No newline at end of file diff --git a/software/trim_galore/test/output/test_R2.fastq.gz_trimming_report.txt b/software/trim_galore/test/output/test_R2.fastq.gz_trimming_report.txt new file mode 100644 index 00000000..c9640b9b --- /dev/null +++ b/software/trim_galore/test/output/test_R2.fastq.gz_trimming_report.txt @@ -0,0 +1,100 @@ + +SUMMARISING RUN PARAMETERS +========================== +Input filename: test_R2.fastq.gz +Trimming mode: paired-end +Trim Galore version: 0.6.5 +Cutadapt version: 2.3 +Number of cores used for trimming: 1 +Quality Phred score cutoff: 20 +Quality encoding type selected: ASCII+33 +Using Nextera adapter for trimming (count: 83). Second best hit was smallRNA (count: 0) +Adapter sequence: 'CTGTCTCTTATA' (Nextera Transposase sequence; auto-detected) +Maximum trimming error rate: 0.1 (default) +Minimum required adapter overlap (stringency): 1 bp +Minimum required sequence length for both reads before a sequence pair gets removed: 20 bp +Output file will be GZIP compressed + + +This is cutadapt 2.3 with Python 3.7.3 +Command line parameters: -j 1 -e 0.1 -q 20 -O 1 -a CTGTCTCTTATA test_R2.fastq.gz +Processing reads on 1 core in single-end mode ... +Finished in 0.22 s (22 us/read; 2.71 M reads/minute). + +=== Summary === + +Total reads processed: 10,000 +Reads with adapters: 3,295 (33.0%) +Reads written (passing filters): 10,000 (100.0%) + +Total basepairs processed: 760,000 bp +Quality-trimmed: 7,096 bp (0.9%) +Total written (filtered): 745,649 bp (98.1%) + +=== Adapter 1 === + +Sequence: CTGTCTCTTATA; Type: regular 3'; Length: 12; Trimmed: 3295 times. + +No. of allowed errors: +0-9 bp: 0; 10-12 bp: 1 + +Bases preceding removed adapters: + A: 22.6% + C: 28.2% + G: 23.6% + T: 25.6% + none/other: 0.0% + +Overview of removed sequences +length count expect max.err error counts +1 2213 2500.0 0 2213 +2 647 625.0 0 647 +3 239 156.2 0 239 +4 53 39.1 0 53 +5 10 9.8 0 10 +6 7 2.4 0 7 +7 8 0.6 0 8 +8 5 0.2 0 5 +9 5 0.0 0 5 +10 10 0.0 1 8 2 +11 2 0.0 1 2 +12 4 0.0 1 4 +13 7 0.0 1 7 +14 3 0.0 1 3 +15 4 0.0 1 4 +16 5 0.0 1 5 +17 3 0.0 1 3 +18 5 0.0 1 4 1 +19 2 0.0 1 1 1 +20 3 0.0 1 3 +21 7 0.0 1 7 +22 6 0.0 1 6 +23 3 0.0 1 3 +24 7 0.0 1 7 +25 4 0.0 1 4 +26 2 0.0 1 2 +27 4 0.0 1 4 +28 1 0.0 1 1 +29 3 0.0 1 3 +30 4 0.0 1 4 +32 3 0.0 1 3 +33 1 0.0 1 1 +34 1 0.0 1 1 +35 2 0.0 1 1 1 +40 1 0.0 1 0 1 +41 1 0.0 1 1 +46 1 0.0 1 0 1 +48 1 0.0 1 0 1 +49 2 0.0 1 0 2 +56 2 0.0 1 0 2 +59 1 0.0 1 0 1 +70 1 0.0 1 0 1 +73 2 0.0 1 0 2 + +RUN STATISTICS FOR INPUT FILE: test_R2.fastq.gz +============================================= +10000 sequences processed in total + +Total number of sequences analysed for the sequence pair length validation: 10000 + +Number of sequence pairs removed because at least one read was shorter than the length cutoff (20 bp): 21 (0.21%) diff --git a/software/trim_galore/test/output/test_R2_val_2.fq.gz b/software/trim_galore/test/output/test_R2_val_2.fq.gz new file mode 120000 index 00000000..440be644 --- /dev/null +++ b/software/trim_galore/test/output/test_R2_val_2.fq.gz @@ -0,0 +1 @@ +../../../../tests/data/fastq/rna/test_R2_val_2.fq.gz \ No newline at end of file