2021-03-23 11:20:58 -06:00
|
|
|
#!/bin/bash
|
|
|
|
# Copies fast5 and fastq files from a GridION to the first available USB device
|
|
|
|
|
|
|
|
# Find the skipped barcodes parameter
|
|
|
|
if [[ ${1} = "-s" ]]; then
|
|
|
|
OFFSET=${2}
|
|
|
|
FOLDERIN=${3}
|
|
|
|
else
|
|
|
|
echo " Assuming no skipped barcodes"
|
|
|
|
echo " CTRL+C if incorrect and call 'ont-transfer -s [1|6|12]', instead"
|
|
|
|
OFFSET=1
|
|
|
|
FOLDERIN=${1}
|
|
|
|
fi
|
|
|
|
|
|
|
|
# Find the directory we are supposed to copy
|
|
|
|
# If no directory was passed, check to see if the current directory will work
|
2021-03-26 13:02:38 -06:00
|
|
|
if [[ ! -n "$FOLDERIN" ]]; then
|
2021-03-23 12:23:52 -06:00
|
|
|
if [[ -n $(find . -type d -name "fast5_pass") ]]; then
|
2021-03-23 11:20:58 -06:00
|
|
|
FOLDERPATH="$PWD"
|
|
|
|
FOLDERNAME=$(basename "$PWD")
|
|
|
|
else
|
|
|
|
echo " ERROR: Can't find FAST5s in current directory and no directory was passed."
|
|
|
|
echo " ont-transfer must either be passed a directory or be called from a directory"
|
|
|
|
echo " containing a 'fast5_pass' folder containing FAST5 files."
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
else
|
|
|
|
FOLDERPATH="$FOLDERIN"
|
|
|
|
FOLDERNAME=$(basename "$FOLDERIN")
|
|
|
|
fi
|
|
|
|
|
|
|
|
# Calculate the FASTAs that need copied
|
2021-03-26 13:02:38 -06:00
|
|
|
if [[ $OFFSET -gt 1 ]]; then
|
|
|
|
if [[ $OFFSET -lt 7 ]]; then
|
2021-03-23 11:20:58 -06:00
|
|
|
KEEPERS=(01 02 03 04 05 06)
|
|
|
|
else
|
|
|
|
KEEPERS=(07 08 09 10 11 12)
|
|
|
|
fi
|
|
|
|
for ((i = 1; i < 8; ++i)); do
|
2021-03-26 13:02:38 -06:00
|
|
|
WORKING=(${KEEPERS[@]: -6})
|
2021-03-23 11:20:58 -06:00
|
|
|
for ((j = 0; j < 6; ++j)); do
|
2021-03-26 13:02:38 -06:00
|
|
|
KEEPERS+=($(( 10#${WORKING[j]} + 12 )))
|
2021-03-23 11:20:58 -06:00
|
|
|
done
|
|
|
|
done
|
|
|
|
else
|
2021-03-23 12:25:57 -06:00
|
|
|
KEEPERS=( $(seq -f "%02g" 01 96) )
|
2021-03-23 11:20:58 -06:00
|
|
|
fi
|
|
|
|
|
|
|
|
# Find where the flash drive is mounted
|
2021-03-26 13:02:38 -06:00
|
|
|
# Note that this only works on GridIONs thanks to goofy mounting
|
|
|
|
USBDRIVE=$(mount | grep /data | awk '{print $3}' | tail -n1)
|
|
|
|
if [[ -n $(echo $USBDRIVE | grep scratch) ]]; then
|
2021-03-23 14:00:18 -06:00
|
|
|
echo "No USB drive was detected. Exiting now."
|
|
|
|
exit 1
|
|
|
|
fi
|
2021-03-23 11:20:58 -06:00
|
|
|
|
|
|
|
# Prompt the users
|
2021-03-26 13:02:38 -06:00
|
|
|
echo "Extracting FAST5/FASTQ from $FOLDERNAME to USB Device $USBDRIVE, skipping after $OFFSET."
|
2021-03-23 11:20:58 -06:00
|
|
|
echo "If this is not correct, press CTRL+C within the next 10 seconds to abort..."
|
|
|
|
sleep 10
|
|
|
|
echo "Proceeding..."
|
|
|
|
|
2021-04-20 12:58:48 -06:00
|
|
|
# Simplify the output paths
|
|
|
|
USBPATH="$USBDRIVE/$FOLDERNAME"
|
|
|
|
FAST5PATH="$USBPATH/fast5"
|
|
|
|
FASTQPATH="$USBPATH/fastq"
|
|
|
|
|
2021-03-23 11:20:58 -06:00
|
|
|
# Make output directories
|
2021-04-20 12:58:48 -06:00
|
|
|
mkdir -p "$FAST5PATH"
|
|
|
|
mkdir -p "$FASTQPATH"
|
2021-03-23 11:20:58 -06:00
|
|
|
|
|
|
|
# Copy the files
|
2021-03-26 13:02:38 -06:00
|
|
|
for FASTA in ${KEEPERS[@]}; do
|
2021-04-20 12:58:48 -06:00
|
|
|
# Check to see if there are any files here
|
2021-04-22 11:07:07 -06:00
|
|
|
if [[ -n $(find $FOLDERPATH -type f -name "*.fast5" -path "*_pass*barcode$FASTA*") ]]; then
|
2021-04-22 11:03:21 -06:00
|
|
|
# Find the uid that the GridION gives to this sample
|
|
|
|
ONTID=$(find $FOLDERPATH -name "*.fast5" -path "*_pass*barcode*$FASTA*" | head -n 1 | xargs basename | awk '{split($0,a,"_"); print a[4]}')
|
|
|
|
|
2021-04-20 12:58:48 -06:00
|
|
|
# Copy FAST5s
|
|
|
|
find $FOLDERPATH -name "*.fast5" -path "*_pass*barcode$FASTA*" -exec cp -n {} $FAST5PATH \;
|
|
|
|
|
|
|
|
# Unzip any gzipped fastqs
|
|
|
|
find $FOLDERPATH -name "*.fastq.gz" -path "*_pass*barcode$FASTA*" -print0 | xargs -0 -r -L1 -P0 gunzip
|
|
|
|
|
|
|
|
# Get the name of the resulting FASTQ file
|
2021-04-22 11:03:21 -06:00
|
|
|
FASTQRESULT="$FASTQPATH"/"$FOLDERNAME"_pass_barcode"$FASTA"_"$ONTID"_0.fastq
|
2021-04-20 12:58:48 -06:00
|
|
|
|
|
|
|
# Concatenate the fastqs
|
|
|
|
cat $FOLDERPATH/*/fastq_pass/barcode$FASTA/*.fastq > $FASTQRESULT
|
2021-03-23 11:20:58 -06:00
|
|
|
|
2021-04-20 12:58:48 -06:00
|
|
|
# Zip up the remainders
|
|
|
|
find $FOLDERPATH -name "*.fastq" -path "*_pass*barcode$FASTA*" -print0 | xargs -0 -r -L1 -P0 gzip
|
|
|
|
|
|
|
|
# Zip up the copied fastq
|
2021-04-20 14:34:26 -06:00
|
|
|
gzip -f $FASTQRESULT
|
2021-04-20 12:58:48 -06:00
|
|
|
fi
|
|
|
|
done
|