mirror of
https://github.com/MillironX/sc2-sequencing.git
synced 2024-12-22 03:08:17 +00:00
Create ONT renaming script
This commit is contained in:
parent
42d3d542b9
commit
471bf4314d
4 changed files with 155 additions and 15 deletions
10
ont/Project.toml
Normal file
10
ont/Project.toml
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
name = "ONTRename"
|
||||||
|
uuid = "e92456f0-0b2d-4004-b474-3ee287651c88"
|
||||||
|
authors = ["Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com>"]
|
||||||
|
version = "0.1.0"
|
||||||
|
|
||||||
|
[deps]
|
||||||
|
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
|
||||||
|
Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
|
||||||
|
Tk = "4225ea8a-3324-57e7-9ddc-5798a2cb9eab"
|
||||||
|
XLSX = "fdbf4ff8-1666-58a4-91e7-1b58723a45e0"
|
|
@ -8,18 +8,6 @@ contain some "faults" to work around these idiosyncrasies.
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
### `ont-demux`
|
|
||||||
|
|
||||||
```bash
|
|
||||||
ont-demux FAP_FOLDER
|
|
||||||
```
|
|
||||||
|
|
||||||
Consolidates all FAST5s into a single gzipped FASTQ using ONT's
|
|
||||||
`guppy_barcoder`. Note that this is for MinKNOW's default settings of 4000
|
|
||||||
reads/file. It is _highly_ recommended to set the "Number of Reads per File"
|
|
||||||
entry in MinKNOW to 0, and checking "Compress FASTQs" to produce nearly the same
|
|
||||||
results.
|
|
||||||
|
|
||||||
### `ont-transfer`
|
### `ont-transfer`
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
@ -29,11 +17,43 @@ ont-transfer [-s 1|6|12] FAP_FOLDER
|
||||||
Transfers all of the passing FAST5 and FASTQ files to the first available USB
|
Transfers all of the passing FAST5 and FASTQ files to the first available USB
|
||||||
drive, skipping empty barcodes based on the number of the first skip passed
|
drive, skipping empty barcodes based on the number of the first skip passed
|
||||||
through the `-s` parameter, consolidating all files of the same type into one
|
through the `-s` parameter, consolidating all files of the same type into one
|
||||||
folder for easier sorting and uploading.
|
folder for easier sorting and uploading, and concatenating and compressing FASTQ
|
||||||
|
files into one per barcode.
|
||||||
|
|
||||||
|
### `ont-rename.jl`
|
||||||
|
|
||||||
|
To be run with Julia.
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
julia --project=PATH_TO_ONT_FOLDER PATH_TO_ONT-RENAME.jl
|
||||||
|
```
|
||||||
|
|
||||||
|
In practice, this looks like
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
julia --project=C:\Users\MillironX\ont C:\Users\MillironX\illumina\ont-rename.jl
|
||||||
|
```
|
||||||
|
|
||||||
|
### `ont-demux` (deprecated)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ont-demux FAP_FOLDER
|
||||||
|
```
|
||||||
|
|
||||||
|
**This functionality is now wrapped into `ont-Transfer`, which can produce the
|
||||||
|
same results using far fewer resources and in far less time.**
|
||||||
|
|
||||||
|
Consolidates all FAST5s into a single gzipped FASTQ using ONT's
|
||||||
|
`guppy_barcoder`. Note that this is for MinKNOW's default settings of 4000
|
||||||
|
reads/file. It is _highly_ recommended to set the "Number of Reads per File"
|
||||||
|
entry in MinKNOW to 0, and checking "Compress FASTQs" to produce nearly the same
|
||||||
|
results.
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
### Requirements
|
### `ont-transfer` and `ont-demux`
|
||||||
|
|
||||||
|
#### Requirements
|
||||||
|
|
||||||
- git
|
- git
|
||||||
- guppy (Get from [ONT Community](https://nanoporetech.com/community)
|
- guppy (Get from [ONT Community](https://nanoporetech.com/community)
|
||||||
|
@ -46,7 +66,7 @@ a terminal and running
|
||||||
sudo apt-get install git parallel -y
|
sudo apt-get install git parallel -y
|
||||||
```
|
```
|
||||||
|
|
||||||
### Instructions
|
#### Instructions
|
||||||
|
|
||||||
Open a terminal (Ctrl+Alt+T), and use the following commands
|
Open a terminal (Ctrl+Alt+T), and use the following commands
|
||||||
|
|
||||||
|
@ -58,3 +78,25 @@ cp wphl-bioinformatics/ont/* ~/bin
|
||||||
chmod +x ~/bin/*
|
chmod +x ~/bin/*
|
||||||
source ~/.bashrc
|
source ~/.bashrc
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### `ont-rename`
|
||||||
|
|
||||||
|
These scripts require [Julia](https://julialang.org).
|
||||||
|
|
||||||
|
1. Download and install Julia
|
||||||
|
1. Select "Add Julia to PATH" as an option
|
||||||
|
2. All other default options will work
|
||||||
|
2. Download the Repo's ZIP file (click green button on upper-right)
|
||||||
|
3. Press `Win+R`, type `%USERPROFILE%` and press Enter
|
||||||
|
4. Copy the ont folder from the ZIP file into the newly opened folder
|
||||||
|
5. Press `Win+X`, then `I` to open a new PowerShell window
|
||||||
|
6. Type `julia --project=$env:USERPROFILE\ont` and press Enter
|
||||||
|
7. Press the `]` key, the prompt should change to say `(ONTRename) pkg>`
|
||||||
|
8. Run `instantiate`
|
||||||
|
9. Press the backspace key to return to the `julia>` prompt
|
||||||
|
10. Run `exit()`
|
||||||
|
11. Close PowerShell
|
||||||
|
12. Right-click the Windows desktop, and click "New | Shortcut..."
|
||||||
|
13. Type `julia --project=%USERPROFILE%\ont
|
||||||
|
%USERPROFILE%\ont\ont-rename.jl` and click Next
|
||||||
|
14. Type `Rename ONT FASTQs` or something else memorable and click "Finish"
|
||||||
|
|
87
ont/ont-rename.jl
Normal file
87
ont/ont-rename.jl
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
#!/usr/bin/julia
|
||||||
|
# Renames the FASTQ files in a directory by prepending the well number based on
|
||||||
|
# a MiSeq run workbook
|
||||||
|
|
||||||
|
# Activate the proper packages
|
||||||
|
using Tk
|
||||||
|
import XLSX
|
||||||
|
using DataFrames
|
||||||
|
using Missings
|
||||||
|
|
||||||
|
# Prompt for the folder containing the FASTQs
|
||||||
|
fastq_folder = ChooseDirectory()
|
||||||
|
|
||||||
|
# Prompt for the Illumina workbook containing the well info
|
||||||
|
ont_workbook = GetOpenFile()
|
||||||
|
|
||||||
|
# Read in the excel file
|
||||||
|
xf = XLSX.readdata(ont_workbook, "Run Worksheet!B9:D56")
|
||||||
|
fastq_ids = DataFrame(xf, :auto)
|
||||||
|
rename!(fastq_ids, Symbol.(["SampleId", "Taxa", "Barcode"]))
|
||||||
|
dropmissing!(fastq_ids)
|
||||||
|
fastq_ids = string.(fastq_ids)
|
||||||
|
|
||||||
|
# Truncate the barcode number
|
||||||
|
fastq_ids.Barcode = last.(fastq_ids.Barcode, 2)
|
||||||
|
|
||||||
|
# Get all of the files
|
||||||
|
fastqs = readdir(fastq_folder)
|
||||||
|
|
||||||
|
# Find the FAP number somewhere in the excel workbook
|
||||||
|
# (The workbooks are not very uniform, so we need to go hunting)
|
||||||
|
fap_search = XLSX.readdata(ont_workbook, 1, "D1:K20")
|
||||||
|
fap_search = string.(skipmissing(fap_search))
|
||||||
|
fap_xl = ""
|
||||||
|
for fap in fap_search
|
||||||
|
global fap_xl
|
||||||
|
if first(fap, 3) == "FAP"
|
||||||
|
fap_xl = fap
|
||||||
|
continue
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Check if the FAP numbers from the directory and the Excel workbook match
|
||||||
|
fap_fl = split(fastqs[1], "_")[1]
|
||||||
|
if fap_fl != fap_xl
|
||||||
|
# Yikes! They don't match! Check if the user is ok with that
|
||||||
|
proceed = Messagebox(message=string(fap_fl,
|
||||||
|
" from the file system doesn't match ",
|
||||||
|
fap_xl,
|
||||||
|
" from the workbook. ",
|
||||||
|
"You might be renaming the wrong files. ",
|
||||||
|
"Rename anyway?"))
|
||||||
|
|
||||||
|
# The user clicked "cancel"
|
||||||
|
if proceed == "cancel"
|
||||||
|
exit()
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
# Iterate through each file
|
||||||
|
for fastq in fastqs
|
||||||
|
# Get the full path
|
||||||
|
fastq_path = joinpath(fastq_folder, fastq)
|
||||||
|
|
||||||
|
# Pull the barcode number and FAP number based on the file name
|
||||||
|
# The file name is divided into 5 chucks separated by underscores,
|
||||||
|
# The barcode number is the last two characters of the third chunk
|
||||||
|
FAP = split(fastq, "_")[1]
|
||||||
|
bc = last(split(fastq, "_")[3], 2)
|
||||||
|
|
||||||
|
# Find this id in the workbook
|
||||||
|
id_row = fastq_ids[fastq_ids.Barcode .== bc,:]
|
||||||
|
|
||||||
|
# If there are no matches, keep going
|
||||||
|
if size(id_row)[1] < 1
|
||||||
|
continue
|
||||||
|
end
|
||||||
|
|
||||||
|
# Construct the new filename
|
||||||
|
fastq_newname = string(FAP, "_pass_", id_row.SampleId[1], ".fastq.gz")
|
||||||
|
|
||||||
|
# Rename the file
|
||||||
|
mv(fastq_path, joinpath(fastq_folder, fastq_newname))
|
||||||
|
println(string("Renaming ", fastq_path, " to ", joinpath(fastq_folder, fastq_newname)))
|
||||||
|
|
||||||
|
end
|
1
ont/src/ONTRename.jl
Normal file
1
ont/src/ONTRename.jl
Normal file
|
@ -0,0 +1 @@
|
||||||
|
# This file intentionally empty
|
Loading…
Reference in a new issue