mirror of
https://github.com/MillironX/sc2-sequencing.git
synced 2025-01-18 02:56:13 -05:00
88 lines
2.6 KiB
Julia
88 lines
2.6 KiB
Julia
|
#!/usr/bin/julia
|
||
|
# Renames the FASTQ files in a directory by prepending the well number based on
|
||
|
# a MiSeq run workbook
|
||
|
|
||
|
# Activate the proper packages
|
||
|
using Tk
|
||
|
import XLSX
|
||
|
using DataFrames
|
||
|
using Missings
|
||
|
|
||
|
# Prompt for the folder containing the FASTQs
|
||
|
fastq_folder = ChooseDirectory()
|
||
|
|
||
|
# Prompt for the Illumina workbook containing the well info
|
||
|
ont_workbook = GetOpenFile()
|
||
|
|
||
|
# Read in the excel file
|
||
|
xf = XLSX.readdata(ont_workbook, "Run Worksheet!B9:D56")
|
||
|
fastq_ids = DataFrame(xf, :auto)
|
||
|
rename!(fastq_ids, Symbol.(["SampleId", "Taxa", "Barcode"]))
|
||
|
dropmissing!(fastq_ids)
|
||
|
fastq_ids = string.(fastq_ids)
|
||
|
|
||
|
# Truncate the barcode number
|
||
|
fastq_ids.Barcode = last.(fastq_ids.Barcode, 2)
|
||
|
|
||
|
# Get all of the files
|
||
|
fastqs = readdir(fastq_folder)
|
||
|
|
||
|
# Find the FAP number somewhere in the excel workbook
|
||
|
# (The workbooks are not very uniform, so we need to go hunting)
|
||
|
fap_search = XLSX.readdata(ont_workbook, 1, "D1:K20")
|
||
|
fap_search = string.(skipmissing(fap_search))
|
||
|
fap_xl = ""
|
||
|
for fap in fap_search
|
||
|
global fap_xl
|
||
|
if first(fap, 3) == "FAP"
|
||
|
fap_xl = fap
|
||
|
continue
|
||
|
end
|
||
|
end
|
||
|
|
||
|
# Check if the FAP numbers from the directory and the Excel workbook match
|
||
|
fap_fl = split(fastqs[1], "_")[1]
|
||
|
if fap_fl != fap_xl
|
||
|
# Yikes! They don't match! Check if the user is ok with that
|
||
|
proceed = Messagebox(message=string(fap_fl,
|
||
|
" from the file system doesn't match ",
|
||
|
fap_xl,
|
||
|
" from the workbook. ",
|
||
|
"You might be renaming the wrong files. ",
|
||
|
"Rename anyway?"))
|
||
|
|
||
|
# The user clicked "cancel"
|
||
|
if proceed == "cancel"
|
||
|
exit()
|
||
|
end
|
||
|
|
||
|
end
|
||
|
|
||
|
# Iterate through each file
|
||
|
for fastq in fastqs
|
||
|
# Get the full path
|
||
|
fastq_path = joinpath(fastq_folder, fastq)
|
||
|
|
||
|
# Pull the barcode number and FAP number based on the file name
|
||
|
# The file name is divided into 5 chucks separated by underscores,
|
||
|
# The barcode number is the last two characters of the third chunk
|
||
|
FAP = split(fastq, "_")[1]
|
||
|
bc = last(split(fastq, "_")[3], 2)
|
||
|
|
||
|
# Find this id in the workbook
|
||
|
id_row = fastq_ids[fastq_ids.Barcode .== bc,:]
|
||
|
|
||
|
# If there are no matches, keep going
|
||
|
if size(id_row)[1] < 1
|
||
|
continue
|
||
|
end
|
||
|
|
||
|
# Construct the new filename
|
||
|
fastq_newname = string(FAP, "_pass_", id_row.SampleId[1], ".fastq.gz")
|
||
|
|
||
|
# Rename the file
|
||
|
mv(fastq_path, joinpath(fastq_folder, fastq_newname))
|
||
|
println(string("Renaming ", fastq_path, " to ", joinpath(fastq_folder, fastq_newname)))
|
||
|
|
||
|
end
|