diff --git a/ont/Project.toml b/ont/Project.toml new file mode 100644 index 0000000..baba8c7 --- /dev/null +++ b/ont/Project.toml @@ -0,0 +1,10 @@ +name = "ONTRename" +uuid = "e92456f0-0b2d-4004-b474-3ee287651c88" +authors = ["Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com>"] +version = "0.1.0" + +[deps] +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" +Tk = "4225ea8a-3324-57e7-9ddc-5798a2cb9eab" +XLSX = "fdbf4ff8-1666-58a4-91e7-1b58723a45e0" diff --git a/ont/README.md b/ont/README.md index 419bc66..395d392 100644 --- a/ont/README.md +++ b/ont/README.md @@ -8,18 +8,6 @@ contain some "faults" to work around these idiosyncrasies. ## Usage -### `ont-demux` - -```bash -ont-demux FAP_FOLDER -``` - -Consolidates all FAST5s into a single gzipped FASTQ using ONT's -`guppy_barcoder`. Note that this is for MinKNOW's default settings of 4000 -reads/file. It is _highly_ recommended to set the "Number of Reads per File" -entry in MinKNOW to 0, and checking "Compress FASTQs" to produce nearly the same -results. - ### `ont-transfer` ```bash @@ -29,11 +17,43 @@ ont-transfer [-s 1|6|12] FAP_FOLDER Transfers all of the passing FAST5 and FASTQ files to the first available USB drive, skipping empty barcodes based on the number of the first skip passed through the `-s` parameter, consolidating all files of the same type into one -folder for easier sorting and uploading. +folder for easier sorting and uploading, and concatenating and compressing FASTQ +files into one per barcode. + +### `ont-rename.jl` + +To be run with Julia. + +```powershell +julia --project=PATH_TO_ONT_FOLDER PATH_TO_ONT-RENAME.jl +``` + +In practice, this looks like + +```powershell +julia --project=C:\Users\MillironX\ont C:\Users\MillironX\illumina\ont-rename.jl +``` + +### `ont-demux` (deprecated) + +```bash +ont-demux FAP_FOLDER +``` + +**This functionality is now wrapped into `ont-Transfer`, which can produce the +same results using far fewer resources and in far less time.** + +Consolidates all FAST5s into a single gzipped FASTQ using ONT's +`guppy_barcoder`. Note that this is for MinKNOW's default settings of 4000 +reads/file. It is _highly_ recommended to set the "Number of Reads per File" +entry in MinKNOW to 0, and checking "Compress FASTQs" to produce nearly the same +results. ## Installation -### Requirements +### `ont-transfer` and `ont-demux` + +#### Requirements - git - guppy (Get from [ONT Community](https://nanoporetech.com/community) @@ -46,7 +66,7 @@ a terminal and running sudo apt-get install git parallel -y ``` -### Instructions +#### Instructions Open a terminal (Ctrl+Alt+T), and use the following commands @@ -58,3 +78,25 @@ cp wphl-bioinformatics/ont/* ~/bin chmod +x ~/bin/* source ~/.bashrc ``` + +### `ont-rename` + +These scripts require [Julia](https://julialang.org). + +1. Download and install Julia + 1. Select "Add Julia to PATH" as an option + 2. All other default options will work +2. Download the Repo's ZIP file (click green button on upper-right) +3. Press `Win+R`, type `%USERPROFILE%` and press Enter +4. Copy the ont folder from the ZIP file into the newly opened folder +5. Press `Win+X`, then `I` to open a new PowerShell window +6. Type `julia --project=$env:USERPROFILE\ont` and press Enter +7. Press the `]` key, the prompt should change to say `(ONTRename) pkg>` +8. Run `instantiate` +9. Press the backspace key to return to the `julia>` prompt +10. Run `exit()` +11. Close PowerShell +12. Right-click the Windows desktop, and click "New | Shortcut..." +13. Type `julia --project=%USERPROFILE%\ont + %USERPROFILE%\ont\ont-rename.jl` and click Next +14. Type `Rename ONT FASTQs` or something else memorable and click "Finish" diff --git a/ont/ont-rename.jl b/ont/ont-rename.jl new file mode 100644 index 0000000..e073bde --- /dev/null +++ b/ont/ont-rename.jl @@ -0,0 +1,87 @@ +#!/usr/bin/julia +# Renames the FASTQ files in a directory by prepending the well number based on +# a MiSeq run workbook + +# Activate the proper packages +using Tk +import XLSX +using DataFrames +using Missings + +# Prompt for the folder containing the FASTQs +fastq_folder = ChooseDirectory() + +# Prompt for the Illumina workbook containing the well info +ont_workbook = GetOpenFile() + +# Read in the excel file +xf = XLSX.readdata(ont_workbook, "Run Worksheet!B9:D56") +fastq_ids = DataFrame(xf, :auto) +rename!(fastq_ids, Symbol.(["SampleId", "Taxa", "Barcode"])) +dropmissing!(fastq_ids) +fastq_ids = string.(fastq_ids) + +# Truncate the barcode number +fastq_ids.Barcode = last.(fastq_ids.Barcode, 2) + +# Get all of the files +fastqs = readdir(fastq_folder) + +# Find the FAP number somewhere in the excel workbook +# (The workbooks are not very uniform, so we need to go hunting) +fap_search = XLSX.readdata(ont_workbook, 1, "D1:K20") +fap_search = string.(skipmissing(fap_search)) +fap_xl = "" +for fap in fap_search + global fap_xl + if first(fap, 3) == "FAP" + fap_xl = fap + continue + end +end + +# Check if the FAP numbers from the directory and the Excel workbook match +fap_fl = split(fastqs[1], "_")[1] +if fap_fl != fap_xl + # Yikes! They don't match! Check if the user is ok with that + proceed = Messagebox(message=string(fap_fl, + " from the file system doesn't match ", + fap_xl, + " from the workbook. ", + "You might be renaming the wrong files. ", + "Rename anyway?")) + + # The user clicked "cancel" + if proceed == "cancel" + exit() + end + +end + +# Iterate through each file +for fastq in fastqs + # Get the full path + fastq_path = joinpath(fastq_folder, fastq) + + # Pull the barcode number and FAP number based on the file name + # The file name is divided into 5 chucks separated by underscores, + # The barcode number is the last two characters of the third chunk + FAP = split(fastq, "_")[1] + bc = last(split(fastq, "_")[3], 2) + + # Find this id in the workbook + id_row = fastq_ids[fastq_ids.Barcode .== bc,:] + + # If there are no matches, keep going + if size(id_row)[1] < 1 + continue + end + + # Construct the new filename + fastq_newname = string(FAP, "_pass_", id_row.SampleId[1], ".fastq.gz") + + # Rename the file + mv(fastq_path, joinpath(fastq_folder, fastq_newname)) + println(string("Renaming ", fastq_path, " to ", joinpath(fastq_folder, fastq_newname))) + +end diff --git a/ont/src/ONTRename.jl b/ont/src/ONTRename.jl new file mode 100644 index 0000000..ca8c402 --- /dev/null +++ b/ont/src/ONTRename.jl @@ -0,0 +1 @@ +# This file intentionally empty