Skip to content

Commit a0b2b2d

Browse files
authored
Merge pull request NVIDIA#1506 from nwnt/nwnt/quote-bash-variables
Apply shellcheck improvements to must-gather.sh
2 parents 0c78427 + 4fd640b commit a0b2b2d

File tree

1 file changed

+67
-68
lines changed

1 file changed

+67
-68
lines changed

hack/must-gather.sh

Lines changed: 67 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -20,27 +20,27 @@ else
2020
if [ -z "${ARTIFACT_DIR:-}" ]; then
2121
export ARTIFACT_DIR="/tmp/nvidia-gpu-operator_$(date +%Y%m%d_%H%M)"
2222
fi
23-
echo "Using ARTIFACT_DIR=$ARTIFACT_DIR"
23+
echo "Using ARTIFACT_DIR=${ARTIFACT_DIR}"
2424
fi
2525

26-
mkdir -p "$ARTIFACT_DIR"
26+
mkdir -p "${ARTIFACT_DIR}"
2727

2828
echo
2929

30-
exec 1> >(tee $ARTIFACT_DIR/must-gather.log)
31-
exec 2> $ARTIFACT_DIR/must-gather.stderr.log
30+
exec 1> >(tee "${ARTIFACT_DIR}/must-gather.log")
31+
exec 2> "${ARTIFACT_DIR}/must-gather.stderr.log"
3232

3333
if [[ "$0" == "/usr/bin/gather" ]]; then
34-
echo "NVIDIA GPU Operator" > $ARTIFACT_DIR/version
35-
echo "${VERSION:-N/A}" >> $ARTIFACT_DIR/version
34+
echo "NVIDIA GPU Operator" > "${ARTIFACT_DIR}/version"
35+
echo "${VERSION:-N/A}" >> "${ARTIFACT_DIR}/version"
3636
fi
3737

3838
ocp_cluster=$($K get clusterversion/version --ignore-not-found -oname || true)
3939

4040
if [[ "$ocp_cluster" ]]; then
4141
echo "Running in OpenShift."
4242
echo "Get the cluster version"
43-
$K get clusterversion/version -oyaml > $ARTIFACT_DIR/openshift_version.yaml
43+
$K get clusterversion/version -oyaml > "${ARTIFACT_DIR}/openshift_version.yaml"
4444
fi
4545

4646
echo
@@ -66,7 +66,7 @@ if [ -z "$OPERATOR_POD_NAME" ]; then
6666
exit 1
6767
fi
6868

69-
OPERATOR_NAMESPACE=$($K get pods -lapp=gpu-operator -A -ojsonpath={.items[].metadata.namespace} --ignore-not-found)
69+
OPERATOR_NAMESPACE=$($K get pods -lapp=gpu-operator -A -ojsonpath='{.items[].metadata.namespace}' --ignore-not-found)
7070

7171
echo "Using '$OPERATOR_NAMESPACE' as operator namespace"
7272
echo ""
@@ -93,12 +93,12 @@ echo
9393

9494
CLUSTER_POLICY_NAME=$($K get clusterpolicy -oname)
9595

96-
if [[ "$CLUSTER_POLICY_NAME" ]]; then
97-
echo "Get $CLUSTER_POLICY_NAME"
98-
$K get -oyaml $CLUSTER_POLICY_NAME > $ARTIFACT_DIR/cluster_policy.yaml
96+
if [[ "${CLUSTER_POLICY_NAME}" ]]; then
97+
echo "Get ${CLUSTER_POLICY_NAME}"
98+
$K get -oyaml "${CLUSTER_POLICY_NAME}" > "${ARTIFACT_DIR}/cluster_policy.yaml"
9999
else
100100
echo "Mark the ClusterPolicy as missing"
101-
touch $ARTIFACT_DIR/cluster_policy.missing
101+
touch "${ARTIFACT_DIR}/cluster_policy.missing"
102102
fi
103103

104104
echo
@@ -109,15 +109,15 @@ echo
109109

110110
if [ "$ocp_cluster" ]; then
111111
echo "Get all the machines"
112-
$K get machines -A > $ARTIFACT_DIR/all_machines.list
112+
$K get machines -A > "${ARTIFACT_DIR}/all_machines.list"
113113
fi
114114

115115
echo "Get the labels of the nodes with NVIDIA PCI cards"
116116

117117
GPU_PCI_LABELS=(feature.node.kubernetes.io/pci-10de.present feature.node.kubernetes.io/pci-0302_10de.present feature.node.kubernetes.io/pci-0300_10de.present)
118118

119119
gpu_pci_nodes=""
120-
for label in ${GPU_PCI_LABELS[@]}; do
120+
for label in "${GPU_PCI_LABELS[@]}"; do
121121
gpu_pci_nodes="$gpu_pci_nodes $($K get nodes -l$label -oname)"
122122
done
123123

@@ -127,23 +127,23 @@ if [ -z "$gpu_pci_nodes" ]; then
127127
fi
128128

129129
for node in $(echo "$gpu_pci_nodes"); do
130-
echo "$node" | cut -d/ -f2 >> $ARTIFACT_DIR/gpu_nodes.labels
131-
$K get $node '-ojsonpath={.metadata.labels}' \
130+
echo "${node}" | cut -d/ -f2 >> "${ARTIFACT_DIR}/gpu_nodes.labels"
131+
$K get "${node}" '-ojsonpath={.metadata.labels}' \
132132
| sed 's|,|,- |g' \
133133
| tr ',' '\n' \
134134
| sed 's/{"/- /' \
135135
| tr : = \
136136
| sed 's/"//g' \
137137
| sed 's/}/\n/' \
138-
>> $ARTIFACT_DIR/gpu_nodes.labels
139-
echo "" >> $ARTIFACT_DIR/gpu_nodes.labels
138+
>> "${ARTIFACT_DIR}/gpu_nodes.labels"
139+
echo "" >> "${ARTIFACT_DIR}/gpu_nodes.labels"
140140
done
141141

142142
echo "Get the GPU nodes (status)"
143-
$K get nodes -l nvidia.com/gpu.present=true -o wide > $ARTIFACT_DIR/gpu_nodes.status
143+
$K get nodes -l nvidia.com/gpu.present=true -o wide > "${ARTIFACT_DIR}/gpu_nodes.status"
144144

145145
echo "Get the GPU nodes (description)"
146-
$K describe nodes -l nvidia.com/gpu.present=true > $ARTIFACT_DIR/gpu_nodes.descr
146+
$K describe nodes -l nvidia.com/gpu.present=true > "${ARTIFACT_DIR}/gpu_nodes.descr"
147147

148148
echo ""
149149
echo "#"
@@ -152,77 +152,77 @@ echo "#"
152152
echo
153153

154154
echo "Get the GPU Operator Pod (status)"
155-
$K get $OPERATOR_POD_NAME \
155+
$K get "${OPERATOR_POD_NAME}" \
156156
-owide \
157-
-n $OPERATOR_NAMESPACE \
158-
> $ARTIFACT_DIR/gpu_operator_pod.status
157+
-n "${OPERATOR_NAMESPACE}" \
158+
> "${ARTIFACT_DIR}/gpu_operator_pod.status"
159159

160160
echo "Get the GPU Operator Pod (yaml)"
161-
$K get $OPERATOR_POD_NAME \
161+
$K get "${OPERATOR_POD_NAME}" \
162162
-oyaml \
163-
-n $OPERATOR_NAMESPACE \
164-
> $ARTIFACT_DIR/gpu_operator_pod.yaml
163+
-n "${OPERATOR_NAMESPACE}" \
164+
> "${ARTIFACT_DIR}/gpu_operator_pod.yaml"
165165

166166
echo "Get the GPU Operator Pod logs"
167-
$K logs $OPERATOR_POD_NAME \
168-
-n $OPERATOR_NAMESPACE \
169-
> "$ARTIFACT_DIR/gpu_operator_pod.log"
167+
$K logs "${OPERATOR_POD_NAME}" \
168+
-n "${OPERATOR_NAMESPACE}" \
169+
> "${ARTIFACT_DIR}/gpu_operator_pod.log"
170170

171-
$K logs $OPERATOR_POD_NAME \
172-
-n $OPERATOR_NAMESPACE \
171+
$K logs "${OPERATOR_POD_NAME}" \
172+
-n "${OPERATOR_NAMESPACE}" \
173173
--previous \
174-
> "$ARTIFACT_DIR/gpu_operator_pod.previous.log"
174+
> "${ARTIFACT_DIR}/gpu_operator_pod.previous.log"
175175

176176
echo ""
177177
echo "#"
178178
echo "# Operand Pods"
179179
echo "#"
180180
echo ""
181181

182-
echo "Get the Pods in $OPERATOR_NAMESPACE (status)"
182+
echo "Get the Pods in ${OPERATOR_NAMESPACE} (status)"
183183
$K get pods -owide \
184-
-n $OPERATOR_NAMESPACE \
185-
> $ARTIFACT_DIR/gpu_operand_pods.status
184+
-n "${OPERATOR_NAMESPACE}" \
185+
> "${ARTIFACT_DIR}/gpu_operand_pods.status"
186186

187-
echo "Get the Pods in $OPERATOR_NAMESPACE (yaml)"
187+
echo "Get the Pods in ${OPERATOR_NAMESPACE} (yaml)"
188188
$K get pods -oyaml \
189-
-n $OPERATOR_NAMESPACE \
190-
> $ARTIFACT_DIR/gpu_operand_pods.yaml
189+
-n "${OPERATOR_NAMESPACE}" \
190+
> "${ARTIFACT_DIR}/gpu_operand_pods.yaml"
191191

192192
echo "Get the GPU Operator Pods Images"
193-
$K get pods -n $OPERATOR_NAMESPACE \
193+
$K get pods -n "${OPERATOR_NAMESPACE}" \
194194
-o=jsonpath='{range .items[*]}{"\n"}{.metadata.name}{":\t"}{range .spec.containers[*]}{.image}{" "}{end}{end}' \
195-
> $ARTIFACT_DIR/gpu_operand_pod_images.txt
195+
> "${ARTIFACT_DIR}/gpu_operand_pod_images.txt"
196196

197197
echo "Get the description and logs of the GPU Operator Pods"
198198

199-
for pod in $($K get pods -n $OPERATOR_NAMESPACE -oname);
199+
for pod in $($K get pods -n "${OPERATOR_NAMESPACE}" -oname);
200200
do
201-
if ! $K get $pod -n $OPERATOR_NAMESPACE -ojsonpath={.metadata.labels} | egrep --quiet '(nvidia|gpu)'; then
201+
if ! $K get "${pod}" -n "${OPERATOR_NAMESPACE}" -ojsonpath='{.metadata.labels}' | grep -E --quiet '(nvidia|gpu)'; then
202202
echo "Skipping $pod, not a NVIDA/GPU Pod ..."
203203
continue
204204
fi
205205
pod_name=$(echo "$pod" | cut -d/ -f2)
206206

207-
if [ $pod == $OPERATOR_POD_NAME ]; then
207+
if [ "${pod}" == "${OPERATOR_POD_NAME}" ]; then
208208
echo "Skipping operator pod $pod_name ..."
209209
continue
210210
fi
211211

212-
$K logs $pod \
213-
-n $OPERATOR_NAMESPACE \
212+
$K logs "${pod}" \
213+
-n "${OPERATOR_NAMESPACE}" \
214214
--all-containers --prefix \
215-
> $ARTIFACT_DIR/gpu_operand_pod_$pod_name.log
215+
> "${ARTIFACT_DIR}/gpu_operand_pod_$pod_name.log"
216216

217-
$K logs $pod \
218-
-n $OPERATOR_NAMESPACE \
217+
$K logs "${pod}" \
218+
-n "${OPERATOR_NAMESPACE}" \
219219
--all-containers --prefix \
220220
--previous \
221-
> $ARTIFACT_DIR/gpu_operand_pod_$pod_name.previous.log
221+
> "${ARTIFACT_DIR}/gpu_operand_pod_$pod_name.previous.log"
222222

223-
$K describe $pod \
224-
-n $OPERATOR_NAMESPACE \
225-
> $ARTIFACT_DIR/gpu_operand_pod_$pod_name.descr
223+
$K describe "${pod}" \
224+
-n "${OPERATOR_NAMESPACE}" \
225+
> "${ARTIFACT_DIR}/gpu_operand_pod_$pod_name.descr"
226226
done
227227

228228
echo ""
@@ -234,27 +234,26 @@ echo ""
234234
echo "Get the DaemonSets in $OPERATOR_NAMESPACE (status)"
235235

236236
$K get ds \
237-
-n $OPERATOR_NAMESPACE \
238-
> $ARTIFACT_DIR/gpu_operand_ds.status
239-
237+
-n "${OPERATOR_NAMESPACE}" \
238+
> "${ARTIFACT_DIR}/gpu_operand_ds.status"
240239

241240
echo "Get the DaemonSets in $OPERATOR_NAMESPACE (yaml)"
242241

243242
$K get ds -oyaml \
244-
-n $OPERATOR_NAMESPACE \
245-
> $ARTIFACT_DIR/gpu_operand_ds.yaml
243+
-n "${OPERATOR_NAMESPACE}" \
244+
> "${ARTIFACT_DIR}/gpu_operand_ds.yaml"
246245

247246
echo "Get the description of the GPU Operator DaemonSets"
248247

249-
for ds in $($K get ds -n $OPERATOR_NAMESPACE -oname);
248+
for ds in $($K get ds -n "${OPERATOR_NAMESPACE}" -oname);
250249
do
251-
if ! $K get $ds -n $OPERATOR_NAMESPACE -ojsonpath={.metadata.labels} | egrep --quiet '(nvidia|gpu)'; then
252-
echo "Skipping $ds, not a NVIDA/GPU DaemonSet ..."
250+
if ! $K get "${ds}" -n "${OPERATOR_NAMESPACE}" -ojsonpath='{.metadata.labels}' | grep -E --quiet '(nvidia|gpu)'; then
251+
echo "Skipping ${ds}, not a NVIDA/GPU DaemonSet ..."
253252
continue
254253
fi
255-
$K describe $ds \
256-
-n $OPERATOR_NAMESPACE \
257-
> $ARTIFACT_DIR/gpu_operand_ds_$(echo "$ds" | cut -d/ -f2).descr
254+
$K describe "${ds}" \
255+
-n "${OPERATOR_NAMESPACE}" \
256+
> "${ARTIFACT_DIR}/gpu_operand_ds_$(echo "$ds" | cut -d/ -f2).descr"
258257
done
259258

260259
echo ""
@@ -263,18 +262,18 @@ echo "# nvidia-bug-report.sh"
263262
echo "#"
264263
echo ""
265264

266-
for pod in $($K get pods -lopenshift.driver-toolkit -oname -n $OPERATOR_NAMESPACE; $K get pods -lapp=nvidia-driver-daemonset -oname -n $OPERATOR_NAMESPACE; $K get pods -lapp=nvidia-vgpu-manager-daemonset -oname -n $OPERATOR_NAMESPACE);
265+
for pod in $($K get pods -lopenshift.driver-toolkit -oname -n "${OPERATOR_NAMESPACE}"; $K get pods -lapp=nvidia-driver-daemonset -oname -n "${OPERATOR_NAMESPACE}"; $K get pods -lapp=nvidia-vgpu-manager-daemonset -oname -n "${OPERATOR_NAMESPACE}");
267266
do
268-
pod_nodename=$($K get $pod -ojsonpath={.spec.nodeName} -n $OPERATOR_NAMESPACE)
267+
pod_nodename=$($K get "${pod}" -ojsonpath={.spec.nodeName} -n "${OPERATOR_NAMESPACE}")
269268
echo "Saving nvidia-bug-report from ${pod_nodename} ..."
270269

271-
$K exec -n $OPERATOR_NAMESPACE $pod -- bash -c 'cd /tmp && nvidia-bug-report.sh' >&2 || \
270+
$K exec -n "${OPERATOR_NAMESPACE}" "${pod}" -- bash -c 'cd /tmp && nvidia-bug-report.sh' >&2 || \
272271
(echo "Failed to collect nvidia-bug-report from ${pod_nodename}" && continue)
273272

274-
$K cp $OPERATOR_NAMESPACE/$(basename $pod):/tmp/nvidia-bug-report.log.gz /tmp/nvidia-bug-report.log.gz || \
273+
$K cp "${OPERATOR_NAMESPACE}"/$(basename "${pod}"):/tmp/nvidia-bug-report.log.gz /tmp/nvidia-bug-report.log.gz || \
275274
(echo "Failed to save nvidia-bug-report from ${pod_nodename}" && continue)
276275

277-
mv /tmp/nvidia-bug-report.log.gz $ARTIFACT_DIR/nvidia-bug-report_${pod_nodename}.log.gz
276+
mv /tmp/nvidia-bug-report.log.gz "${ARTIFACT_DIR}/nvidia-bug-report_${pod_nodename}.log.gz"
278277
done
279278

280279
echo ""

0 commit comments

Comments
 (0)