From 5343dceb1f8eb50eb4c37a8bd68f14a004fea11b Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Thu, 16 May 2019 23:25:37 -0600 Subject: [PATCH] Added predictive modeling capabilities --- main.sh | 14 +++++++++++-- sample-classifier.sh | 49 ++++++++++++++++++++++++++++++++++++++++++++ sample-regression.sh | 49 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 110 insertions(+), 2 deletions(-) create mode 100644 sample-classifier.sh create mode 100644 sample-regression.sh diff --git a/main.sh b/main.sh index 1c12951..545d3e3 100644 --- a/main.sh +++ b/main.sh @@ -164,7 +164,7 @@ qiime composition add-pseudocount \ --i-table feature-table.qza \ --o-composition-table composition-table.qza -# Run ancom for CowID, Age, TrmtGroup +# Run ancom for all categories in catcols # Once again, QIIME only uses one processor (even though this # is a HUGE task), so we should parallelize it for speed cat catcols.txt | \ @@ -175,6 +175,16 @@ cat catcols.txt | \ --m-metadata-column {} \ --o-visualization "visualizations/ancom-{}.qzv" \ --verbose -echo "--^-- X: Performing ANCOM...Done!" +echo "--^-- X: Performing ANCOM...Done!" + +# Create category-based predictive models +cat catcols.txt | \ + xargs -P"$SLURM_NTASKS" -L1 srun -n1 -N1 --exclusive \ + ./sample-classifier.sh + +# Create continuous predictive models +cat numcols.txt | \ + xargs -P"$SLURM_NTASKS" -L1 srun -n1 -N1 --exclusive \ + ./sample-regression.sh echo "All Done!" \ No newline at end of file diff --git a/sample-classifier.sh b/sample-classifier.sh new file mode 100644 index 0000000..e2a9be9 --- /dev/null +++ b/sample-classifier.sh @@ -0,0 +1,49 @@ +#!/bin/bash +#SBATCH --account=cowusda2016 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=4 +#SBATCH --mem=8G + +# Fetch the category we're working with from upstream +CATEGORY=${1%} + +# Save the folder name we will be saving everything into +FOLDERNAME="${CATEGORY}-classifier" + +# Load the required modules +module restore system +module load swset +module load miniconda3 + +# Start up qiime +source activate qiime2 + +# Make sure we have a clean slate to work with +echo "--^-- X: Clearing previous classifier results..." +rm -r "$FOLDERNAME" +echo "--^-- X: Clearing previous classifier results...Done!" + +# Solve the model +echo "--^-- X: Constructing model..." +qiime sample-classifier classify-samples \ + --i-table feature-table.qza \ + --m-metadata-file metadata.tsv \ + --m-metadata-column "$CATEGORY" \ + --p-n-jobs 4 \ + --p-missing-samples ignore \ + --p-optimize-feature-selection \ + --output-dir "$FOLDERNAME" \ + --verbose +echo "--^-- X: Constructing model...Done!" + +# Convert the model output into readable visualizations +echo "--^-- X: Making visualizations..." +qiime metadata tabulate \ + --m-input-file "${FOLDERNAME}/feature_importance.qza" \ + --o-visualization "${FOLDERNAME}/feature-importance.qzv" + +qiime metadata tabulate \ + --m-input-file "${FOLDERNAME}/predictions.qza" \ + --m-input-file metadata.tsv \ + --o-visualization "${FOLDERNAME}/predictions.qzv" +echo "--^-- X: Making visualizations...Done!" \ No newline at end of file diff --git a/sample-regression.sh b/sample-regression.sh new file mode 100644 index 0000000..75fa766 --- /dev/null +++ b/sample-regression.sh @@ -0,0 +1,49 @@ +#!/bin/bash +#SBATCH --account=cowusda2016 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=4 +#SBATCH --mem=8G + +# Fetch the category we're working with from upstream +CATEGORY=${1%} + +# Save the folder name we will be saving everything into +FOLDERNAME="${CATEGORY}-regression" + +# Load the required modules +module restore system +module load swset +module load miniconda3 + +# Start up qiime +source activate qiime2 + +# Make sure we have a clean slate to work with +echo "--^-- X: Clearing previous regression results..." +rm -r "$FOLDERNAME" +echo "--^-- X: Clearing previous regression results...Done!" + +# Solve the model +echo "--^-- X: Constructing model..." +qiime sample-classifier regress-samples \ + --i-table feature-table.qza \ + --m-metadata-file metadata.tsv \ + --m-metadata-column "$CATEGORY" \ + --p-n-jobs 4 \ + --p-missing-samples ignore \ + --p-optimize-feature-selection \ + --output-dir "$FOLDERNAME" \ + --verbose +echo "--^-- X: Constructing model...Done!" + +# Convert the model output into readable visualizations +echo "--^-- X: Making visualizations..." +qiime metadata tabulate \ + --m-input-file "${FOLDERNAME}/feature_importance.qza" \ + --o-visualization "${FOLDERNAME}/feature-importance.qzv" + +qiime metadata tabulate \ + --m-input-file "${FOLDERNAME}/predictions.qza" \ + --m-input-file metadata.tsv \ + --o-visualization "${FOLDERNAME}/predictions.qzv" +echo "--^-- X: Making visualizations...Done!" \ No newline at end of file