sc2-sequencing/ont/ont-transfer

98 lines
3 KiB
Text
Raw Normal View History

#!/bin/bash
# Copies fast5 and fastq files from a GridION to the first available USB device
# Find the skipped barcodes parameter
if [[ ${1} = "-s" ]]; then
OFFSET=${2}
FOLDERIN=${3}
else
echo " Assuming no skipped barcodes"
echo " CTRL+C if incorrect and call 'ont-transfer -s [1|6|12]', instead"
OFFSET=1
FOLDERIN=${1}
fi
# Find the directory we are supposed to copy
# If no directory was passed, check to see if the current directory will work
if [[ ! -n "$FOLDERIN" ]]; then
2021-03-23 12:23:52 -06:00
if [[ -n $(find . -type d -name "fast5_pass") ]]; then
FOLDERPATH="$PWD"
FOLDERNAME=$(basename "$PWD")
else
echo " ERROR: Can't find FAST5s in current directory and no directory was passed."
echo " ont-transfer must either be passed a directory or be called from a directory"
echo " containing a 'fast5_pass' folder containing FAST5 files."
exit 1
fi
else
FOLDERPATH="$FOLDERIN"
FOLDERNAME=$(basename "$FOLDERIN")
fi
# Calculate the FASTAs that need copied
if [[ $OFFSET -gt 1 ]]; then
if [[ $OFFSET -lt 7 ]]; then
KEEPERS=(01 02 03 04 05 06)
else
KEEPERS=(07 08 09 10 11 12)
fi
for ((i = 1; i < 8; ++i)); do
WORKING=(${KEEPERS[@]: -6})
for ((j = 0; j < 6; ++j)); do
KEEPERS+=($(( 10#${WORKING[j]} + 12 )))
done
done
else
2021-03-23 12:25:57 -06:00
KEEPERS=( $(seq -f "%02g" 01 96) )
fi
# Find where the flash drive is mounted
# Note that this only works on GridIONs thanks to goofy mounting
USBDRIVE=$(mount | grep /data | awk '{print $3}' | tail -n1)
if [[ -n $(echo $USBDRIVE | grep scratch) ]]; then
2021-03-23 14:00:18 -06:00
echo "No USB drive was detected. Exiting now."
exit 1
fi
# Prompt the users
echo "Extracting FAST5/FASTQ from $FOLDERNAME to USB Device $USBDRIVE, skipping after $OFFSET."
echo "If this is not correct, press CTRL+C within the next 10 seconds to abort..."
sleep 10
echo "Proceeding..."
2021-04-20 12:58:48 -06:00
# Simplify the output paths
USBPATH="$USBDRIVE/$FOLDERNAME"
FAST5PATH="$USBPATH/fast5"
FASTQPATH="$USBPATH/fastq"
# Make output directories
2021-04-20 12:58:48 -06:00
mkdir -p "$FAST5PATH"
mkdir -p "$FASTQPATH"
# Copy the files
for FASTA in ${KEEPERS[@]}; do
2021-04-20 12:58:48 -06:00
# Check to see if there are any files here
2021-04-22 11:07:07 -06:00
if [[ -n $(find $FOLDERPATH -type f -name "*.fast5" -path "*_pass*barcode$FASTA*") ]]; then
# Find the uid that the GridION gives to this sample
ONTID=$(find $FOLDERPATH -name "*.fast5" -path "*_pass*barcode*$FASTA*" | head -n 1 | xargs basename | awk '{split($0,a,"_"); print a[4]}')
2021-04-20 12:58:48 -06:00
# Copy FAST5s
find $FOLDERPATH -name "*.fast5" -path "*_pass*barcode$FASTA*" -exec cp -n {} $FAST5PATH \;
# Unzip any gzipped fastqs
find $FOLDERPATH -name "*.fastq.gz" -path "*_pass*barcode$FASTA*" -print0 | xargs -0 -r -L1 -P0 gunzip
# Get the name of the resulting FASTQ file
FASTQRESULT="$FASTQPATH"/"$FOLDERNAME"_pass_barcode"$FASTA"_"$ONTID"_0.fastq
2021-04-20 12:58:48 -06:00
# Concatenate the fastqs
cat $FOLDERPATH/*/fastq_pass/barcode$FASTA/*.fastq > $FASTQRESULT
2021-04-20 12:58:48 -06:00
# Zip up the remainders
find $FOLDERPATH -name "*.fastq" -path "*_pass*barcode$FASTA*" -print0 | xargs -0 -r -L1 -P0 gzip
# Zip up the copied fastq
gzip -f $FASTQRESULT
2021-04-20 12:58:48 -06:00
fi
done