From 5343dceb1f8eb50eb4c37a8bd68f14a004fea11b Mon Sep 17 00:00:00 2001
From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com>
Date: Thu, 16 May 2019 23:25:37 -0600
Subject: [PATCH] Added predictive modeling capabilities

---
 main.sh              | 14 +++++++++++--
 sample-classifier.sh | 49 ++++++++++++++++++++++++++++++++++++++++++++
 sample-regression.sh | 49 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 110 insertions(+), 2 deletions(-)
 create mode 100644 sample-classifier.sh
 create mode 100644 sample-regression.sh

diff --git a/main.sh b/main.sh
index 1c12951..545d3e3 100644
--- a/main.sh
+++ b/main.sh
@@ -164,7 +164,7 @@ qiime composition add-pseudocount \
  --i-table feature-table.qza \
  --o-composition-table composition-table.qza
 
-# Run ancom for CowID, Age, TrmtGroup
+# Run ancom for all categories in catcols
 # Once again, QIIME only uses one processor (even though this
 # is a HUGE task), so we should parallelize it for speed
 cat catcols.txt | \
@@ -175,6 +175,16 @@ cat catcols.txt | \
    --m-metadata-column {} \
    --o-visualization "visualizations/ancom-{}.qzv" \
    --verbose
-echo "--^-- X: Performing ANCOM...Done!"	 
+echo "--^-- X: Performing ANCOM...Done!"
+
+# Create category-based predictive models
+cat catcols.txt | \
+  xargs -P"$SLURM_NTASKS" -L1 srun -n1 -N1 --exclusive \
+  ./sample-classifier.sh
+
+# Create continuous predictive models
+cat numcols.txt | \
+  xargs -P"$SLURM_NTASKS" -L1 srun -n1 -N1 --exclusive \
+  ./sample-regression.sh
 
 echo "All Done!"
\ No newline at end of file
diff --git a/sample-classifier.sh b/sample-classifier.sh
new file mode 100644
index 0000000..e2a9be9
--- /dev/null
+++ b/sample-classifier.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+#SBATCH --account=cowusda2016
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=4
+#SBATCH --mem=8G
+
+# Fetch the category we're working with from upstream
+CATEGORY=${1%}
+
+# Save the folder name we will be saving everything into
+FOLDERNAME="${CATEGORY}-classifier"
+
+# Load the required modules
+module restore system
+module load swset
+module load miniconda3
+
+# Start up qiime
+source activate qiime2
+
+# Make sure we have a clean slate to work with
+echo "--^-- X: Clearing previous classifier results..."
+rm -r "$FOLDERNAME"
+echo "--^-- X: Clearing previous classifier results...Done!"
+
+# Solve the model
+echo "--^-- X: Constructing model..."
+qiime sample-classifier classify-samples \
+ --i-table feature-table.qza \
+ --m-metadata-file metadata.tsv \
+ --m-metadata-column "$CATEGORY" \
+ --p-n-jobs 4 \
+ --p-missing-samples ignore \
+ --p-optimize-feature-selection \
+ --output-dir "$FOLDERNAME" \
+ --verbose
+echo "--^-- X: Constructing model...Done!"
+
+# Convert the model output into readable visualizations
+echo "--^-- X: Making visualizations..."
+qiime metadata tabulate \
+ --m-input-file "${FOLDERNAME}/feature_importance.qza" \
+ --o-visualization "${FOLDERNAME}/feature-importance.qzv"
+
+qiime metadata tabulate \
+ --m-input-file "${FOLDERNAME}/predictions.qza" \
+ --m-input-file metadata.tsv \
+ --o-visualization "${FOLDERNAME}/predictions.qzv"
+echo "--^-- X: Making visualizations...Done!"
\ No newline at end of file
diff --git a/sample-regression.sh b/sample-regression.sh
new file mode 100644
index 0000000..75fa766
--- /dev/null
+++ b/sample-regression.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+#SBATCH --account=cowusda2016
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=4
+#SBATCH --mem=8G
+
+# Fetch the category we're working with from upstream
+CATEGORY=${1%}
+
+# Save the folder name we will be saving everything into
+FOLDERNAME="${CATEGORY}-regression"
+
+# Load the required modules
+module restore system
+module load swset
+module load miniconda3
+
+# Start up qiime
+source activate qiime2
+
+# Make sure we have a clean slate to work with
+echo "--^-- X: Clearing previous regression results..."
+rm -r "$FOLDERNAME"
+echo "--^-- X: Clearing previous regression results...Done!"
+
+# Solve the model
+echo "--^-- X: Constructing model..."
+qiime sample-classifier regress-samples \
+ --i-table feature-table.qza \
+ --m-metadata-file metadata.tsv \
+ --m-metadata-column "$CATEGORY" \
+ --p-n-jobs 4 \
+ --p-missing-samples ignore \
+ --p-optimize-feature-selection \
+ --output-dir "$FOLDERNAME" \
+ --verbose
+echo "--^-- X: Constructing model...Done!"
+
+# Convert the model output into readable visualizations
+echo "--^-- X: Making visualizations..."
+qiime metadata tabulate \
+ --m-input-file "${FOLDERNAME}/feature_importance.qza" \
+ --o-visualization "${FOLDERNAME}/feature-importance.qzv"
+
+qiime metadata tabulate \
+ --m-input-file "${FOLDERNAME}/predictions.qza" \
+ --m-input-file metadata.tsv \
+ --o-visualization "${FOLDERNAME}/predictions.qzv"
+echo "--^-- X: Making visualizations...Done!"
\ No newline at end of file