From d1fadc00a9a9642738e6b94ff72535e2cc9be994 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Thu, 16 May 2019 22:44:54 -0600 Subject: [PATCH] Made parallel processing more efficent --- main.sh | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/main.sh b/main.sh index 3bc9166..b8d5f8e 100644 --- a/main.sh +++ b/main.sh @@ -1,7 +1,8 @@ #!/bin/bash #SBATCH --account=cowusda2016 -#SBATCH --cpus-per-task=16 -#SBATCH --mem=32G +#SBATCH --cpus-per-task=4 +#SBATCH --mem=8G +#SBATCH --ntasks=32 #SBATCH --time="3-00:00:00" # DEPENDENCIES: @@ -12,7 +13,6 @@ # Modules to load module load swset module load gcc -module load parallel module load miniconda3 module load metaxa2 module load r @@ -24,7 +24,7 @@ module load r # enough cpus to run echo "--^-- X: Reading FASTQ sequences..." find . -maxdepth 1 -name "*R1_001.fastq.gz" | \ -parallel -j 4 ./fastq-to-taxonomy.sh + xargs -L1 -P"$SLURM_NTASKS" srun -n1 -N1 --exclusive ./fastq-to-taxonomy.sh echo "--^-- X: Reading FASTQ sequences...Done!" # Compile those pesky individual taxonomic tables into a single @@ -33,7 +33,6 @@ echo "--^-- X: Compiling feature table..." metaxa2_dc -i *.level_7.txt -o metaxa-feature-table.tsv echo "--^-- X: Compiling feature table...Done!" - # Rearrange the feature table to something QIIME likes a little bit better echo "--^-- X: Rearranging feature table..." Rscript ./manipulatefeaturetable.R @@ -82,7 +81,7 @@ qiime diversity core-metrics \ --i-table feature-table.qza \ --p-sampling-depth "$RAREFACTION" \ --m-metadata-file metadata.tsv \ - --p-n-jobs 16 \ + --p-n-jobs 4 \ --output-dir core-metrics-results \ --verbose echo "--^-- X: Running core-metrics...Done!" @@ -137,14 +136,15 @@ echo "--^-- X: Finding alpha-correlations...Done!" # Now for the tricky part: beta-diversity echo "--^-- X: Checking entries for beta-significance..." # QIIME only uses one processor for these, so we can parallelize this step -parallel -i -a catcols.txt \ - qiime diversity beta-group-significance \ - --i-distance-matrix core-metrics-results/bray_curtis_distance_matrix.qza \ - --m-metadata-file metadata.tsv \ - --m-metadata-column {} \ - --p-pairwise \ - --o-visualization "visualizations/bray-curtis-{}-significance.qzv" \ - --verbose +cat catcols.txt | \ + xargs -P"$SLURM_NTASKS" -I {} srun -n1 -N1 --exclusive \ + qiime diversity beta-group-significance \ + --i-distance-matrix core-metrics-results/bray_curtis_distance_matrix.qza \ + --m-metadata-file metadata.tsv \ + --m-metadata-column {} \ + --p-pairwise \ + --o-visualization "visualizations/bray-curtis-{}-significance.qzv" \ + --verbose echo "--^-- X: Checking entries for beta-significance...Done!" echo "--^-- X: Performing ANCOM..." @@ -157,13 +157,14 @@ qiime composition add-pseudocount \ # Run ancom for CowID, Age, TrmtGroup # Once again, QIIME only uses one processor (even though this # is a HUGE task), so we should parallelize it for speed -parallel -i -a catcols.txt \ - qiime composition ancom \ - --i-table composition-table.qza \ - --m-metadata-file metadata.tsv \ - --m-metadata-column {} \ - --o-visualization "visualizations/ancom-{}.qzv" \ - --verbose +cat catcols.txt | \ + xargs -P"$SLURM_NTASKS" -I {} srun -n1 -N1 --exclusive \ + qiime composition ancom \ + --i-table composition-table.qza \ + --m-metadata-file metadata.tsv \ + --m-metadata-column {} \ + --o-visualization "visualizations/ancom-{}.qzv" \ + --verbose echo "--^-- X: Performing ANCOM...Done!" echo "All Done!" \ No newline at end of file