Skip to content

Commit ec701f6

Browse files
authored
run_operator_locally.sh: more retrying, debuggability (zalando#2218)
actually retry kubectl port-forward and better messages
1 parent 1e64ae7 commit ec701f6

File tree

1 file changed

+58
-6
lines changed

1 file changed

+58
-6
lines changed

run_operator_locally.sh

+58-6
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ function retry(){
3030
local -r retry_cmd="$1"
3131
local -r retry_msg="$2"
3232

33-
# times out after 1 minute
34-
for i in {1..20}; do
33+
# Time out after three minutes.
34+
for i in {1..60}; do
3535
if eval "$retry_cmd"; then
3636
return 0
3737
fi
@@ -165,11 +165,63 @@ function forward_ports(){
165165
local operator_pod
166166
operator_pod=$(kubectl get pod -l name=postgres-operator -o jsonpath={.items..metadata.name})
167167

168-
# runs in the background to keep current terminal responsive
169-
# stdout redirect removes the info message about forwarded ports; the message sometimes garbles the cli prompt
170-
kubectl port-forward "$operator_pod" "$LOCAL_PORT":"$OPERATOR_PORT" &> /dev/null &
168+
# Spawn `kubectl port-forward` in the background to keep current terminal
169+
# responsive. Hide stdout because otherwise there is a note about each TCP
170+
# connection. Do not hide stderr so port-forward setup errors can be
171+
# debugged. Sometimes the port-forward setup fails because expected k8s
172+
# state isn't achieved yet. Try to detect that case and then run the
173+
# command again (in a finite loop).
174+
for _attempt in {1..20}; do
175+
# Delay between retry attempts. First attempt should already be
176+
# delayed.
177+
echo "soon: invoke kubectl port-forward command (attempt $_attempt)"
178+
sleep 5
179+
180+
# With the --pod-running-timeout=4s argument the process is expected
181+
# to terminate within about that time if the pod isn't ready yet.
182+
kubectl port-forward --pod-running-timeout=4s "$operator_pod" "$LOCAL_PORT":"$OPERATOR_PORT" 1> /dev/null &
183+
_kubectl_pid=$!
184+
_pf_success=true
185+
186+
# A successful `kubectl port-forward` setup can pragmatically be
187+
# detected with a time-based criterion: it is a long-running process if
188+
# successfully set up. If it does not terminate within deadline then
189+
# consider the setup successful. Overall, observe the process for
190+
# roughly 7 seconds. If it terminates before that it's certainly an
191+
# error. If it did not terminate within that time frame then consider
192+
# setup successful.
193+
for ib in {1..7}; do
194+
sleep 1
195+
# Portable and non-blocking test: is process still running?
196+
if kill -s 0 -- "${_kubectl_pid}" >/dev/null 2>&1; then
197+
echo "port-forward process is still running"
198+
else
199+
# port-forward process seems to have terminated, reap zombie
200+
set +e
201+
# `wait` is now expected to be non-blocking, and exits with the
202+
# exit code of pid (first arg).
203+
wait $_kubectl_pid
204+
_kubectl_rc=$?
205+
set -e
206+
echo "port-forward process terminated with exit code ${_kubectl_rc}"
207+
_pf_success=false
208+
break
209+
fi
210+
done
211+
212+
if [ ${_pf_success} = true ]; then
213+
echo "port-forward setup seems successful. leave retry loop."
214+
break
215+
fi
216+
217+
done
218+
219+
if [ "${_pf_success}" = false ]; then
220+
echo "port-forward setup failed after retrying. exit."
221+
exit 1
222+
fi
171223

172-
echo $! > "$PATH_TO_PORT_FORWARED_KUBECTL_PID"
224+
echo "${_kubectl_pid}" > "$PATH_TO_PORT_FORWARED_KUBECTL_PID"
173225
}
174226

175227

0 commit comments

Comments
 (0)