diff --git a/.gitignore b/.gitignore index 9e8bc2d..0c65fe1 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,175 @@ FAP012345 + +# Files generated by invoking Julia with --code-coverage +*.jl.cov +*.jl.*.cov + +# Files generated by invoking Julia with --track-allocation +*.jl.mem + +# System-specific files and directories generated by the BinaryProvider and BinDeps packages +# They contain absolute paths specific to the host computer, and so should not be committed +deps/deps.jl +deps/build.log +deps/downloads/ +deps/usr/ +deps/src/ + +# Build artifacts for creating documentation generated by the Documenter package +docs/build/ +docs/site/ + +# File generated by Pkg, the package manager, based on a corresponding Project.toml +# It records a fixed state of all packages used by the project. As such, it should not be +# committed for packages, but should be committed for applications that require a static +# environment. +Manifest.toml + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +*.code-workspace + +# Local History for Visual Studio Code +.history/ diff --git a/illumina/Project.toml b/illumina/Project.toml new file mode 100644 index 0000000..2c78e6c --- /dev/null +++ b/illumina/Project.toml @@ -0,0 +1,10 @@ +name = "illumina" +uuid = "312f6f2e-1d90-4382-8764-8510b154b64c" +authors = ["Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com>"] +version = "0.1.0" + +[deps] +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" +Tk = "4225ea8a-3324-57e7-9ddc-5798a2cb9eab" +XLSX = "fdbf4ff8-1666-58a4-91e7-1b58723a45e0" diff --git a/illumina/rename-fastqs.jl b/illumina/rename-fastqs.jl new file mode 100644 index 0000000..fc706cb --- /dev/null +++ b/illumina/rename-fastqs.jl @@ -0,0 +1,59 @@ +#!/usr/bin/julia +# Renames the FASTQ files in a directory by prepending the well number + +# Activate the proper packages +using Tk +import XLSX +using DataFrames +using Missings + +# Prompt for the folder containing the FASTQs +fastq_folder = ChooseDirectory() + +# Prompt for the Illumina workbook containing the well info +illumina_workbook = GetOpenFile() + +# Read in the excel file +xf = XLSX.readdata(illumina_workbook, "IEM_SampleSheet!B19:D300") +fastq_ids = DataFrame(xf, :auto) +rename!(fastq_ids, Symbol.(["SampleId", "RunName", "WellNum"])) +dropmissing!(fastq_ids) +fastq_ids = string.(fastq_ids) + +# Get all of the files +fastqs = readdir(fastq_folder) + +# Iterate through each file +for fastq in fastqs + # Get the full path + fastq_path = joinpath(fastq_folder, fastq) + + # Delete if this is a JSON file + if last(fastq,5) == ".json" + rm(fastq_path) + println(string("Deleting ", fastq_path)) + continue + end + + # Pull the id based on the file name + id = first(fastq,21) + + # Find this id in the workbook + id_row = fastq_ids[fastq_ids.SampleId .== id,:] + + # If there are no matches, keep going + if size(id_row)[1] < 1 + continue + end + + # Swap the column and row + well_num = string(last(id_row.WellNum[1],1), first(id_row.WellNum[1],1)) + + # Construct the new filename + fastq_newname = string(well_num, "_", fastq) + + # Rename the file + mv(fastq_path, joinpath(fastq_folder, fastq_newname)) + println(string("Renaming ", fastq_path, " to ", joinpath(fastq_folder, fastq_newname))) + +end