Add Sanger Pathogens' Assembly Improvement

This commit is contained in:
MillironX 2021-07-19 09:58:37 -05:00
parent 0c3b4ccb06
commit f05c179091
Signed by: millironx
GPG key ID: 139C07724802BC5D

View file

@ -0,0 +1,173 @@
Bootstrap: library
From: default/centos:latest
%post
# Quiet perl down
export LC_ALL=C
# Install package files: repositories
dnf install dnf-plugins-core -y
dnf install epel-release -y
dnf config-manager --set-enabled PowerTools
# Operating dependencies
dnf --setopt=install_weak_deps=False install procps perl python3 -y
# Building dependencies
dnf groupinstall 'Development Tools' -y
dnf install perl-App-cpanminus boost-devel cmake jsoncpp-devel jemalloc-devel expat-devel -y
# Install SSPACE
cd /opt || exit 1
git clone https://github.com/nsoranzo/sspace_basic.git
cd sspace_basic || exit 1
git checkout 4fe5c # Not available as release tarball
cd || exit 1
export PATH=$PATH:/opt/sspace_basic
# Install GapFiller
curl -L https://downloads.sourceforge.net/project/gapfiller/v2.1.2/gapfiller-2.1.2.tar.gz | tar xvz
cd gapfiller-2.1.2
./configure
make && make install
cd .. || exit 1
rm -rf gapfiller-2.1.2
# Install khmer
pip3 install khmer
# Install MUMmer
curl -L https://github.com/mummer4/mummer/releases/download/v4.0.0rc1/mummer-4.0.0rc1.tar.gz | tar xvz
cd mummer-4.0.0rc1 || exit 1
./configure
make && make install
cd .. || exit 1
rm -rf mummer-4.0.0rc1
# SGA uses an old C++ standard, so all of its dependencies must be compiled
# against that standard, too
export CFLAGS=-std=gnu++03
export CXXFLAGS=-std=gnu++03
export CPPFLAGS=-std=gnu++03
# Install Google sparsehash (dependency for SGA)
curl -L https://github.com/sparsehash/sparsehash/archive/refs/tags/sparsehash-2.0.4.tar.gz | tar xvz
cd sparsehash-sparsehash-2.0.4 || exit 1
./configure
make && make install
cd .. || exit 1
rm -rf sparsehash-sparsehash-2.0.4
# Install bamtools library (dependency for SGA)
curl -L https://github.com/pezmaster31/bamtools/archive/refs/tags/v2.5.2.tar.gz | tar xvz
cd bamtools-2.5.2 || exit 1
mkdir build && cd build || exit 1
# Dependency issue unique to RHEL/CentOS 8: https://github.com/ComplianceAsCode/content/issues/7016#issuecomment-845066366
dnf upgrade libarchive -y
cmake ..
make && make install
cd ../.. || exit 1
rm -rf bamtools-2.5.2
# Install ruffus (dependency for SGA)
pip3 install ruffus --upgrade
# Install pysam (dependency for SGA)
pip3 install pysam
# Install SGA
git clone https://github.com/jts/sga.git
cd sga/src || exit 1
git checkout 229293b # Contains compiler-blocking bug fixes that are not in the release tarball
./autogen.sh
./configure --with-bamtools=/usr/local
make && make install
cd ../.. || exit 1
rm -rf sga
# Unset the compiler flags
export CFLAGS=''
export CXXFLAGS=''
export CPPFLAGS=''
# QUASR is nowhere to be found, so skip installing it
# Install assembly_improvement's dependencies
cpanm -f Bio::AssemblyImprovement
# Install an up-to-date version of assembly_improvement
cd /opt || exit 1
curl -L https://github.com/sanger-pathogens/assembly_improvement/releases/download/v2021.01.04.08.24.00.726/Bio_AssemblyImprovement-2021.01.04.08.24.00.726.tar.gz | tar xvz
export PATH=$PATH:/opt/Bio_AssemblyImprovement-2021.01.04.08.24.00.726/bin
cd || exit 1
# Shim preloaded arguments into PATH
IMPROVE_PREFIX=/opt/Bio_AssemblyImprovement-2021.01.04.08.24.00.726/bin
SSPACE_PATH=/opt/sspace_basic/SSPACE_Basic.pl
GF_PATH=/usr/local/bin/GapFiller
ABACAS_PATH=/usr/local/bin/abacas.pl
SGA_PATH=/usr/local/bin/sga
cd /opt || exit 1
mkdir shims && cd shims || exit 1
echo "$IMPROVE_PREFIX/improve_assembly -s $SSPACE_PATH -g $GF_PATH -b $ABACAS_PATH \"\$@\"" > improve_assembly
echo "$IMPROVE_PREFIX/diginorm_with_khmer -py /usr/bin/python3 \"\$@\"" > diginorm_with_khmer
echo "$IMPROVE_PREFIX/fill_gaps_with_gapfiller -s $GF_PATH \"\$@\"" > fill_gaps_with_gapfiller
echo "$IMPROVE_PREFIX/order_contigs_with_abacas -b $ABACAS_PATH \"\$@\"" > order_contigs_with_abacas
echo "$IMPROVE_PREFIX/read_correction_with_sga -s $SGA_PATH \"\$@\"" > read_correction_with_sga
echo "$IMPROVE_PREFIX/scaffold_with_sspace -s $SSPACE_PATH \"\$@\"" > scaffold_with_sspace
echo "echo 'Error! QUASR is discontinued and not available in this container.'; exit 1" > remove_primers_with_quasr
chmod +x ./*
cd || exit 1
export PATH=/opt/shims:$PATH
# Clean up
dnf groupremove 'Development Tools' -y
dnf remove perl-App-cpanminus boost-devel cmake jsoncpp-devel jemalloc-devel expat-devel -y
dnf autoremove -y
dnf clean all
%environment
export LC_ALL=C
export PATH=$PATH:/opt/sspace_basic
export PATH=$PATH:/opt/Bio_AssemblyImprovement-2021.01.04.08.24.00.726/bin
export PATH=/opt/shims:$PATH
%test
# We can use bash notation here, since /bin/sh is symlinked to bash in CentOS
CMDS=('bash' 'ps' 'SSPACE_Basic.pl' 'GapFiller' 'abacas.pl' 'sga' 'improve_assembly' 'diginorm_with_khmer' 'fill_gaps_with_gapfiller' 'order_contigs_with_abacas' 'read_correction_with_sga' 'scaffold_with_sspace')
for CMD in "${CMDS[@]}"; do
if ! command -v "$CMD"; then
echo "command $CMD not found!"
FAILED_TESTS=1
fi
done
# Abort if a test failed
if [ -n "$FAILED_TESTS" ]; then
exit 1
fi
%runscript
/opt/Bio_AssemblyImprovement-2021.01.04.08.24.00.726/bin/improve_assembly -s /opt/sspace_basic/SSPACE_Basic.pl -g /usr/local/bin/GapFiller -b /usr/local/bin/abacas.pl "$@"
%labels
Author 25492070+MillironX@users.noreply.github.com
SoftwareVersion 1.160490
SingularityDefinitionVersion 1
%help
Assembly Improvement
====================
Take in an assembly in FASTA format, reads in FASTQ format, and make the
assembly better by scaffolding and gap filling.
**NOTE:** When using this container, **do not** use options that point to the
paths of binaries. These have already been included in the container, and
shims are setup to point to the containerized binaries already.
E.g. `improve_assembly` is shimmed to execute
`improve_assembly -s /opt/sspace_basic/SSPACE_Basic.pl -g /usr/local/bin/GapFiller -b /usr/local/bin/abacas.pl "$@"`,
so providing `-s`, `-g`, or `-b` will break the usage.
For more help, see
- <https://github.com/sanger-pathogens/assembly_improvement>
- <https://github.com/MillironX/singularity-builds>