-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathq2-core.sh
executable file
·282 lines (261 loc) · 10.5 KB
/
q2-core.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
#!/bin/bash
################################standardlized pipeline for qiime2 core analysis
#Yan Hui
###############################################################################
# Wrapped function, e.g. usage()
usage () {
echo ""
echo "Note: This script conducts qiime2 core analysis according to a subset mapping file."
echo ""
echo "Usage: $0 [ -m -g -b -a -t -j -h]"
echo " -m, --metadata Required, load one subset mapping file, accepted by Qiime 2."
echo " e.g. mapping_demo.tsv _ is needed to extract subset naming."
echo " -g, --group Required, the category for comparison."
echo " -p, --preset Preset arguments to direct analysis the amplicon output from KU food."
echo " -b, --biom Required if -p not applied, tsv file for biom-like feature table."
echo " -a, --annotation Required if -p not applied, taxonomy annotation."
echo " -t, --tree Required if -p not applied, phenogenetic tree."
echo " -j, --jobs overwrite if -p applied, the number of threads."
echo " -h, --help Optional, help message."
echo ""
echo "Example:"
echo "$0 -m /path/to/mapping.file -g CategoryGroup -b /path/to/biom.tsv -a /path/to/taxonomy.tsv -t /path/to/tree.nwk -j Threads"
echo "$0 -m /path/to/mapping.file -g CategoryGroup -p"
echo ""
echo "";}
#############################################################################
# Check input, ensure alphabet/numbers behind -/--, and at least one option
if [ $# -eq 0 ] || ! [[ $* =~ ^(-|--)[a-z] ]]; then
echo "Invalid use: please check the help message below." ; usage; exit 1; fi
# Params loading
args=$(getopt --long "preset,metadata:,group:,biom:,annotation:,tree:,jobs:,help" -o "pm:g:b:a:t:j:h" -n "Input error" -- "$@")
# Ensure corrected input of params
if [ $? -ne 0 ]; then usage; exit 1; fi
eval set -- "$args"
while true ; do
case "$1" in
-m|--metadata) METADATA="$2"; shift 2;;
-g|--group) GROUP="$2"; shift 2;;
-b|--biom) BIOM="$2"; shift 2;;
-a|--annotation) ANNOTATION="$2"; shift 2;;
-t|--tree) TREE="$2"; shift 2;;
-j|--jobs) JOBS="$2"; shift 2;;
-p|--preset) PRESET=true; shift 1;; # Indicator changed
-h|--help) usage; exit 1; shift 1;;
*) break;;
esac
done
#############################################################################
# Paratermer initialization
if [ "$PRESET" == true ]; then
PREPATH=$(find ./ -type d -name "Results*")
if [ -z "$BIOM" ]; then
BIOM="$PREPATH/OTU-tables/zOTU_table_GG.txt"
fi
if [ -z "$ANNOTATION" ]; then
ANNOTATION="$PREPATH/taxonomy/greengenes_taxa.txt"
fi
if [ -z "$TREE" ]; then
TREE="$PREPATH/trees/zOTU.tree"
fi
if [ -z "$JOBS" ]; then
JOBS=1
fi
fi
source activate qiime2-2020.11
###############################################################################
# generate a random name to avoid collapse in parallel use
QIIME2ANA=qiime2Ana_${RANDOM}
mkdir -p "$QIIME2ANA"
cp "$BIOM" "$QIIME2ANA/biom.tsv"
cp "$ANNOTATION" "$QIIME2ANA/taxonomy_raw.tsv"
cp "$TREE" "$QIIME2ANA/tree.nwk"
cp "$METADATA" "$QIIME2ANA/metadata.tsv"
# create the aggregated metadata for grouped abundance plot
python ./q2meta-grouped.py -m "$METADATA" -g "$GROUP" -o "$QIIME2ANA/metadata_Group.tsv"
# extract subset metadata naming
FILE_NAME=$(basename -- "$METADATA")
FILE_NAME2=${FILE_NAME%%.*}
A=${FILE_NAME2##*_}
cd "$QIIME2ANA"
# re-transfroming txt to tsv using qiime2 default biom-format
biom convert -i biom.tsv -o table.biom --table-type="OTU table" --to-hdf5
##############################################################################################################################
# start qiime2 post-analysis
# import .biom otu table, taxonomy and tree file
# import .biom otu table in terms of biom-format version (2.1.7)
qiime tools import \
--input-path table.biom \
--type 'FeatureTable[Frequency]' \
--input-format BIOMV210Format \
--output-path feature-table_raw.qza
# filter the samples from the feature table
qiime feature-table filter-samples \
--i-table feature-table_raw.qza \
--m-metadata-file metadata.tsv \
--o-filtered-table feature-table.qza
# transform to biom file to get the sample depth
unzip -jo feature-table.qza '*/data/feature-table.biom' -d ./
biom summarize-table -i feature-table.biom
read -p "Please choose the rarefraction depth: " DEPTH
# import tree file
qiime tools import \
--input-path tree.nwk \
--output-path rooted-tree.qza \
--type 'Phylogeny[Rooted]'
# import taxonomy file to qiime2
cut -f 1,2 taxonomy_raw.tsv > taxonomy.tsv
sed -i '1d' taxonomy.tsv
qiime tools import \
--input-path taxonomy.tsv \
--input-format HeaderlessTSVTaxonomyFormat \
--output-path taxonomy.qza \
--type 'FeatureData[Taxonomy]'
###########################################################################################################################
# summarize the feature table
mkdir Overview
qiime feature-table summarize \
--i-table feature-table.qza \
--m-sample-metadata-file metadata.tsv \
--o-visualization Overview/feature_table_summary.qzv
# rarifraction curves
qiime diversity alpha-rarefaction \
--i-table feature-table.qza \
--p-max-depth $DEPTH \
--p-steps 20 \
--i-phylogeny rooted-tree.qza \
--m-metadata-file metadata.tsv \
--o-visualization Overview/rarefaction_curves.qzv
# generate the taxonomy boxplots for each sample
mkdir Tax_Bin
qiime taxa barplot \
--i-table feature-table.qza \
--i-taxonomy taxonomy.qza \
--m-metadata-file metadata.tsv \
--o-visualization Tax_Bin/taxa_barplot_eachSample.qzv
# generate the taxonomy boxplots for each group
# each category shall have one separated feature table
qiime feature-table group \
--i-table feature-table.qza \
--p-axis sample \
--p-mode sum \
--m-metadata-file metadata.tsv \
--m-metadata-column "$GROUP" \
--o-grouped-table feature-table_Group.qza
# generate the respective tax barplot
qiime taxa barplot \
--i-table feature-table_Group.qza \
--i-taxonomy taxonomy.qza \
--m-metadata-file metadata_Group.tsv \
--o-visualization Tax_Bin/taxa_barplot_Group.qzv
#################################################################################################################################
# generate the diversity core-metrics-phylogenetic
# alpha and beta diversity
qiime diversity core-metrics-phylogenetic \
--i-table feature-table.qza \
--i-phylogeny rooted-tree.qza \
--p-sampling-depth $DEPTH \
--m-metadata-file metadata.tsv \
--p-n-jobs-or-threads $JOBS \
--output-dir diversity
# 2D diversity PCoA (already wrapped in the "Emporer" visualization file)
# significance test
# alpha diversity test
for alpha in ./diversity/*_vector.qza
do
alphaname=$(sed "-es/_vector.qza//" <<< $alpha)
qiime diversity alpha-group-significance \
--i-alpha-diversity $alpha \
--m-metadata-file metadata.tsv \
--o-visualization ${alphaname}_compare_groups.qzv
done
# beta diversity permutation test
for beta in ./diversity/*_distance_matrix.qza
do
betaname=$(sed "-es/_distance_matrix.qza//" <<< $beta)
qiime diversity beta-group-significance \
--i-distance-matrix $beta \
--m-metadata-file metadata.tsv \
--m-metadata-column "$GROUP" \
--p-pairwise \
--o-visualization ${betaname}_compare_groups.qzv
done
# Identify differentially abundant features with ANCOM ??? Personal use
# add the pseudocount
#qiime composition add-pseudocount \
# --i-table feature-table.qza \
# --o-composition-table feature-table_pseudocount.qza
# run ANCOM on feature table by category
#time qiime composition ancom \
# --i-table feature-table_pseudocount.qza \
# --m-metadata-file mapping_${A}.tsv \
# --m-metadata-column $Group \
# --output-dir ancom_output
# run ANCOM on upper level by category
# sumarize the feature table to different levels
mkdir ./ancom_output
for i in {2..7}
do
# collapse the feature table to level 2-7
qiime taxa collapse \
--i-table feature-table.qza \
--i-taxonomy taxonomy.qza \
--p-level $i \
--o-collapsed-table ./ancom_output/feature-table_l${i}.qza
# add pseudocount
qiime composition add-pseudocount \
--i-table ./ancom_output/feature-table_l${i}.qza \
--o-composition-table ./ancom_output/feature-table_l${i}_pseudocount.qza
# run ancom
qiime composition ancom \
--i-table ./ancom_output/feature-table_l${i}_pseudocount.qza \
--m-metadata-file metadata.tsv \
--m-metadata-column "$GROUP" \
--o-visualization ./ancom_output/ancom-Group_l${i}.qzv
done
conda deactivate
# add the file description
echo "
Qiime2 standard analysis, built with qiime2-2020.11.
Output directory defination: qiime2Ana_[rarefaction depth]_[mapping file suffix]-[Column header for Comparison],
eg: qiime2Ana_5000_demo-Group, qiime2 result for samples in mapping_demo.tsv comparison on Group column at the rarefaction depth of 5000.
1. File type
*.qza: Qiime2 input file.
*.qzv: Qiime2 visulization file, view with browser: https://view.qiime2.org/.
2. Directory structure
2.1. Overview
feature_table_summary.qzv -- overview the feature table, depth of each samples, etc.
rarefraction_curves.qzv -- rarefraction curves based Metadata.
2.2. Diversity
2.2.1. Alpha diversity
evenness|faith_pd|observed_otus|shannon_comparegroups.qzv
2.2.2. Beta diversity
jaccard|bray_curtis|weighted_unifrac|unweighted_unifrac_emperor.qzv
jaccard|bray_curtis|weighted_unifrac|unweighted_unifrac_compare_groups.qzv
*.qza -- Required Qiime2 input files
2.3. Tax_Bin
taxa_barplot_eachSample.qzv -- relative abundance bar plot with each samples.
taxa_barplot_Group.qzv -- grouped relative abundance bar plot.
2.4. ancom_output
ancom-Group_l{2..7}.qzv -- ANCOM test across Group on the taxonomy level 2,3,4,5,6,7.
*.qza -- Required Qiime2 input files.
Please check the Qiim2(https://qiime2.org/) if more questions exist.
Thanks.
Yan Hui
" > ./Readme.txt
####################################################################################################################################################
##################################################Intermediate File Cleaning AND Output Renaming####################################################
tput setaf 4
echo ""
read -p ">>>>>>Would you like to remove intermediate products? y/n: " CHOICE
if [ "$CHOICE" = "y" ]; then
rm -f *qza *tree zOTU* *biom
elif [ "$CHOICE" == "n" ]; then
echo " Yep. Temporary files will be kept"
fi
tput sgr0
# Rename output
mv ../${QIIME2ANA} ../qiime2Ana_${DEPTH}_${A}-${GROUP}
exit