mirror of
https://github.com/MillironX/sc2-sequencing.git
synced 2024-12-22 03:08:17 +00:00
Create ONT renaming script
This commit is contained in:
parent
42d3d542b9
commit
471bf4314d
4 changed files with 155 additions and 15 deletions
10
ont/Project.toml
Normal file
10
ont/Project.toml
Normal file
|
@ -0,0 +1,10 @@
|
|||
name = "ONTRename"
|
||||
uuid = "e92456f0-0b2d-4004-b474-3ee287651c88"
|
||||
authors = ["Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com>"]
|
||||
version = "0.1.0"
|
||||
|
||||
[deps]
|
||||
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
|
||||
Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
|
||||
Tk = "4225ea8a-3324-57e7-9ddc-5798a2cb9eab"
|
||||
XLSX = "fdbf4ff8-1666-58a4-91e7-1b58723a45e0"
|
|
@ -8,18 +8,6 @@ contain some "faults" to work around these idiosyncrasies.
|
|||
|
||||
## Usage
|
||||
|
||||
### `ont-demux`
|
||||
|
||||
```bash
|
||||
ont-demux FAP_FOLDER
|
||||
```
|
||||
|
||||
Consolidates all FAST5s into a single gzipped FASTQ using ONT's
|
||||
`guppy_barcoder`. Note that this is for MinKNOW's default settings of 4000
|
||||
reads/file. It is _highly_ recommended to set the "Number of Reads per File"
|
||||
entry in MinKNOW to 0, and checking "Compress FASTQs" to produce nearly the same
|
||||
results.
|
||||
|
||||
### `ont-transfer`
|
||||
|
||||
```bash
|
||||
|
@ -29,11 +17,43 @@ ont-transfer [-s 1|6|12] FAP_FOLDER
|
|||
Transfers all of the passing FAST5 and FASTQ files to the first available USB
|
||||
drive, skipping empty barcodes based on the number of the first skip passed
|
||||
through the `-s` parameter, consolidating all files of the same type into one
|
||||
folder for easier sorting and uploading.
|
||||
folder for easier sorting and uploading, and concatenating and compressing FASTQ
|
||||
files into one per barcode.
|
||||
|
||||
### `ont-rename.jl`
|
||||
|
||||
To be run with Julia.
|
||||
|
||||
```powershell
|
||||
julia --project=PATH_TO_ONT_FOLDER PATH_TO_ONT-RENAME.jl
|
||||
```
|
||||
|
||||
In practice, this looks like
|
||||
|
||||
```powershell
|
||||
julia --project=C:\Users\MillironX\ont C:\Users\MillironX\illumina\ont-rename.jl
|
||||
```
|
||||
|
||||
### `ont-demux` (deprecated)
|
||||
|
||||
```bash
|
||||
ont-demux FAP_FOLDER
|
||||
```
|
||||
|
||||
**This functionality is now wrapped into `ont-Transfer`, which can produce the
|
||||
same results using far fewer resources and in far less time.**
|
||||
|
||||
Consolidates all FAST5s into a single gzipped FASTQ using ONT's
|
||||
`guppy_barcoder`. Note that this is for MinKNOW's default settings of 4000
|
||||
reads/file. It is _highly_ recommended to set the "Number of Reads per File"
|
||||
entry in MinKNOW to 0, and checking "Compress FASTQs" to produce nearly the same
|
||||
results.
|
||||
|
||||
## Installation
|
||||
|
||||
### Requirements
|
||||
### `ont-transfer` and `ont-demux`
|
||||
|
||||
#### Requirements
|
||||
|
||||
- git
|
||||
- guppy (Get from [ONT Community](https://nanoporetech.com/community)
|
||||
|
@ -46,7 +66,7 @@ a terminal and running
|
|||
sudo apt-get install git parallel -y
|
||||
```
|
||||
|
||||
### Instructions
|
||||
#### Instructions
|
||||
|
||||
Open a terminal (Ctrl+Alt+T), and use the following commands
|
||||
|
||||
|
@ -58,3 +78,25 @@ cp wphl-bioinformatics/ont/* ~/bin
|
|||
chmod +x ~/bin/*
|
||||
source ~/.bashrc
|
||||
```
|
||||
|
||||
### `ont-rename`
|
||||
|
||||
These scripts require [Julia](https://julialang.org).
|
||||
|
||||
1. Download and install Julia
|
||||
1. Select "Add Julia to PATH" as an option
|
||||
2. All other default options will work
|
||||
2. Download the Repo's ZIP file (click green button on upper-right)
|
||||
3. Press `Win+R`, type `%USERPROFILE%` and press Enter
|
||||
4. Copy the ont folder from the ZIP file into the newly opened folder
|
||||
5. Press `Win+X`, then `I` to open a new PowerShell window
|
||||
6. Type `julia --project=$env:USERPROFILE\ont` and press Enter
|
||||
7. Press the `]` key, the prompt should change to say `(ONTRename) pkg>`
|
||||
8. Run `instantiate`
|
||||
9. Press the backspace key to return to the `julia>` prompt
|
||||
10. Run `exit()`
|
||||
11. Close PowerShell
|
||||
12. Right-click the Windows desktop, and click "New | Shortcut..."
|
||||
13. Type `julia --project=%USERPROFILE%\ont
|
||||
%USERPROFILE%\ont\ont-rename.jl` and click Next
|
||||
14. Type `Rename ONT FASTQs` or something else memorable and click "Finish"
|
||||
|
|
87
ont/ont-rename.jl
Normal file
87
ont/ont-rename.jl
Normal file
|
@ -0,0 +1,87 @@
|
|||
#!/usr/bin/julia
|
||||
# Renames the FASTQ files in a directory by prepending the well number based on
|
||||
# a MiSeq run workbook
|
||||
|
||||
# Activate the proper packages
|
||||
using Tk
|
||||
import XLSX
|
||||
using DataFrames
|
||||
using Missings
|
||||
|
||||
# Prompt for the folder containing the FASTQs
|
||||
fastq_folder = ChooseDirectory()
|
||||
|
||||
# Prompt for the Illumina workbook containing the well info
|
||||
ont_workbook = GetOpenFile()
|
||||
|
||||
# Read in the excel file
|
||||
xf = XLSX.readdata(ont_workbook, "Run Worksheet!B9:D56")
|
||||
fastq_ids = DataFrame(xf, :auto)
|
||||
rename!(fastq_ids, Symbol.(["SampleId", "Taxa", "Barcode"]))
|
||||
dropmissing!(fastq_ids)
|
||||
fastq_ids = string.(fastq_ids)
|
||||
|
||||
# Truncate the barcode number
|
||||
fastq_ids.Barcode = last.(fastq_ids.Barcode, 2)
|
||||
|
||||
# Get all of the files
|
||||
fastqs = readdir(fastq_folder)
|
||||
|
||||
# Find the FAP number somewhere in the excel workbook
|
||||
# (The workbooks are not very uniform, so we need to go hunting)
|
||||
fap_search = XLSX.readdata(ont_workbook, 1, "D1:K20")
|
||||
fap_search = string.(skipmissing(fap_search))
|
||||
fap_xl = ""
|
||||
for fap in fap_search
|
||||
global fap_xl
|
||||
if first(fap, 3) == "FAP"
|
||||
fap_xl = fap
|
||||
continue
|
||||
end
|
||||
end
|
||||
|
||||
# Check if the FAP numbers from the directory and the Excel workbook match
|
||||
fap_fl = split(fastqs[1], "_")[1]
|
||||
if fap_fl != fap_xl
|
||||
# Yikes! They don't match! Check if the user is ok with that
|
||||
proceed = Messagebox(message=string(fap_fl,
|
||||
" from the file system doesn't match ",
|
||||
fap_xl,
|
||||
" from the workbook. ",
|
||||
"You might be renaming the wrong files. ",
|
||||
"Rename anyway?"))
|
||||
|
||||
# The user clicked "cancel"
|
||||
if proceed == "cancel"
|
||||
exit()
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
# Iterate through each file
|
||||
for fastq in fastqs
|
||||
# Get the full path
|
||||
fastq_path = joinpath(fastq_folder, fastq)
|
||||
|
||||
# Pull the barcode number and FAP number based on the file name
|
||||
# The file name is divided into 5 chucks separated by underscores,
|
||||
# The barcode number is the last two characters of the third chunk
|
||||
FAP = split(fastq, "_")[1]
|
||||
bc = last(split(fastq, "_")[3], 2)
|
||||
|
||||
# Find this id in the workbook
|
||||
id_row = fastq_ids[fastq_ids.Barcode .== bc,:]
|
||||
|
||||
# If there are no matches, keep going
|
||||
if size(id_row)[1] < 1
|
||||
continue
|
||||
end
|
||||
|
||||
# Construct the new filename
|
||||
fastq_newname = string(FAP, "_pass_", id_row.SampleId[1], ".fastq.gz")
|
||||
|
||||
# Rename the file
|
||||
mv(fastq_path, joinpath(fastq_folder, fastq_newname))
|
||||
println(string("Renaming ", fastq_path, " to ", joinpath(fastq_folder, fastq_newname)))
|
||||
|
||||
end
|
1
ont/src/ONTRename.jl
Normal file
1
ont/src/ONTRename.jl
Normal file
|
@ -0,0 +1 @@
|
|||
# This file intentionally empty
|
Loading…
Reference in a new issue