sc2-sequencing/ont/ont-rename.jl

88 lines
2.6 KiB
Julia
Raw Normal View History

2021-04-22 12:39:45 -06:00
#!/usr/bin/julia
# Renames the FASTQ files in a directory by prepending the well number based on
# a MiSeq run workbook
# Activate the proper packages
using Tk
import XLSX
using DataFrames
using Missings
# Prompt for the folder containing the FASTQs
fastq_folder = ChooseDirectory()
# Prompt for the Illumina workbook containing the well info
ont_workbook = GetOpenFile()
# Read in the excel file
xf = XLSX.readdata(ont_workbook, "Run Worksheet!B9:D56")
fastq_ids = DataFrame(xf, :auto)
rename!(fastq_ids, Symbol.(["SampleId", "Taxa", "Barcode"]))
dropmissing!(fastq_ids)
fastq_ids = string.(fastq_ids)
# Truncate the barcode number
fastq_ids.Barcode = last.(fastq_ids.Barcode, 2)
# Get all of the files
fastqs = readdir(fastq_folder)
# Find the FAP number somewhere in the excel workbook
# (The workbooks are not very uniform, so we need to go hunting)
fap_search = XLSX.readdata(ont_workbook, 1, "D1:K20")
fap_search = string.(skipmissing(fap_search))
fap_xl = ""
for fap in fap_search
global fap_xl
if first(fap, 3) == "FAP"
fap_xl = fap
continue
end
end
# Check if the FAP numbers from the directory and the Excel workbook match
fap_fl = split(fastqs[1], "_")[1]
if fap_fl != fap_xl
# Yikes! They don't match! Check if the user is ok with that
proceed = Messagebox(message=string(fap_fl,
" from the file system doesn't match ",
fap_xl,
" from the workbook. ",
"You might be renaming the wrong files. ",
"Rename anyway?"))
# The user clicked "cancel"
if proceed == "cancel"
exit()
end
end
# Iterate through each file
for fastq in fastqs
# Get the full path
fastq_path = joinpath(fastq_folder, fastq)
# Pull the barcode number and FAP number based on the file name
# The file name is divided into 5 chucks separated by underscores,
# The barcode number is the last two characters of the third chunk
FAP = split(fastq, "_")[1]
bc = last(split(fastq, "_")[3], 2)
# Find this id in the workbook
id_row = fastq_ids[fastq_ids.Barcode .== bc,:]
# If there are no matches, keep going
if size(id_row)[1] < 1
continue
end
# Construct the new filename
fastq_newname = string(FAP, "_pass_", id_row.SampleId[1], ".fastq.gz")
# Rename the file
mv(fastq_path, joinpath(fastq_folder, fastq_newname))
println(string("Renaming ", fastq_path, " to ", joinpath(fastq_folder, fastq_newname)))
end