88import os
99import shlex
1010import subprocess
11+ import textwrap
12+ import sys
13+ import threading
14+ from datetime import datetime , timezone
1115from pathlib import Path
16+ from time import perf_counter
1217from typing import Iterable
1318
1419
4045ITERATIONS = int (os .environ .get ("ITERATIONS" , "1" ))
4146BRAINTRUST_UPLOADER = BraintrustUploader (os .environ )
4247
48+
49+ def _log (message : str ) -> None :
50+ """Emit a timestamped log line that pytest `-s` will surface immediately."""
51+ timestamp = datetime .now (timezone .utc ).isoformat (timespec = "seconds" )
52+ print (f"[{ timestamp } ] { message } " , flush = True )
53+
54+
55+ def _summarise_command (parts : Iterable [str ]) -> str :
56+ """Return a shell-style command string for debugging output."""
57+ sequence = parts if isinstance (parts , list ) else list (parts )
58+ if hasattr (shlex , "join" ):
59+ return shlex .join (sequence )
60+ # ``shlex.join`` was added in Python 3.8; keep a safe fallback just in case.
61+ return " " .join (shlex .quote (part ) for part in sequence )
62+
63+
64+ def _preview_output (output : str , * , limit : int = 400 ) -> str :
65+ """Provide a trimmed preview of command output for quick debugging."""
66+ return textwrap .shorten (output .strip (), width = limit , placeholder = " …" )
67+
4368pytestmark = [
4469 pytest .mark .skipif (
4570 not RUN_LIVE ,
@@ -90,14 +115,59 @@ def _build_command(prompt: str, model: str, resource_group: str, cluster_name: s
90115
91116
92117def _run_cli (command : Iterable [str ], env : dict [str , str ]) -> str :
118+ command_list = list (command )
119+ command_display = _summarise_command (command_list )
120+ _log (f"Invoking AKS Agent CLI: { command_display } " )
121+ start = perf_counter ()
122+
123+ timeout_seconds = 600 # 10 minutes timeout
124+
93125 try :
94- result = subprocess .run ( # noqa: S603
95- list (command ),
96- check = True ,
97- capture_output = True ,
126+ # Use Popen for real-time output visibility
127+ process = subprocess .Popen ( # noqa: S603
128+ command_list ,
129+ stdout = subprocess .PIPE ,
130+ stderr = subprocess .PIPE ,
98131 text = True ,
99132 env = env ,
100133 )
134+
135+ # Thread to print stderr in real-time
136+ stderr_lines = []
137+ def print_stderr ():
138+ for line in iter (process .stderr .readline , '' ):
139+ if line :
140+ print (f"[STDERR] { line .rstrip ()} " , file = sys .stderr , flush = True )
141+ stderr_lines .append (line )
142+
143+ stderr_thread = threading .Thread (target = print_stderr , daemon = True )
144+ stderr_thread .start ()
145+
146+ # Wait with timeout
147+ try :
148+ stdout , _ = process .communicate (timeout = timeout_seconds )
149+ stderr_thread .join (timeout = 1 )
150+ stderr = '' .join (stderr_lines )
151+ except subprocess .TimeoutExpired :
152+ process .kill ()
153+ stdout , stderr_remainder = process .communicate ()
154+ stderr = '' .join (stderr_lines ) + (stderr_remainder or '' )
155+ _log (f"[ERROR] CLI command timed out after { timeout_seconds } s" )
156+ pytest .fail (
157+ f"AKS Agent CLI call timed out after { timeout_seconds } s\n "
158+ f"Command: { command_display } \n "
159+ f"Stdout: { stdout } \n "
160+ f"Stderr: { stderr } "
161+ )
162+
163+ if process .returncode != 0 :
164+ raise subprocess .CalledProcessError (
165+ process .returncode , command_list , stdout , stderr
166+ )
167+
168+ result = subprocess .CompletedProcess (
169+ command_list , process .returncode , stdout , stderr
170+ )
101171 except subprocess .CalledProcessError as exc : # pragma: no cover - live failure path
102172 output = exc .stdout or ""
103173 stderr = exc .stderr or ""
@@ -109,13 +179,28 @@ def _run_cli(command: Iterable[str], env: dict[str, str]) -> str:
109179 f"Stdout: { output } \n "
110180 f"Stderr: { stderr } "
111181 )
182+ duration = perf_counter () - start
183+ stdout_preview = _preview_output (result .stdout )
184+ stderr_preview = _preview_output (result .stderr ) if result .stderr else None
185+ _log (
186+ f"AKS Agent CLI completed in { duration :.1f} s with stdout preview: { stdout_preview } "
187+ )
188+ if stderr_preview :
189+ _log (
190+ f"AKS Agent CLI stderr preview: { stderr_preview } "
191+ )
112192 return result .stdout
113193
114194
115195def _run_commands (
116196 commands : list [str ], env : dict [str , str ], label : str , scenario : Scenario
117197) -> None :
198+ if not commands :
199+ _log (f"[{ label } ] { scenario .name } : no commands to run" )
200+ return
118201 for cmd in commands :
202+ _log (f"[{ label } ] { scenario .name } : running shell command: { cmd } " )
203+ start = perf_counter ()
119204 try :
120205 completed = subprocess .run ( # noqa: S603
121206 cmd ,
@@ -137,9 +222,25 @@ def _run_commands(
137222 f"Stderr: { stderr } "
138223 )
139224 else :
225+ duration = perf_counter () - start
140226 # Provide quick visibility into command results when debugging failures.
141227 if completed .stdout :
142- print (f"[{ label } ] { scenario .name } : { completed .stdout .strip ()} " )
228+ stdout_preview = _preview_output (completed .stdout )
229+ _log (
230+ f"[{ label } ] { scenario .name } : succeeded in { duration :.1f} s; stdout preview: { stdout_preview } "
231+ )
232+ else :
233+ _log (
234+ f"[{ label } ] { scenario .name } : succeeded in { duration :.1f} s; no stdout produced"
235+ )
236+ if completed .stderr :
237+ stderr_preview = _preview_output (completed .stderr )
238+ _log (
239+ f"[{ label } ] { scenario .name } : stderr preview: { stderr_preview } "
240+ )
241+ _log (
242+ f"[{ label } ] { scenario .name } : completed { len (commands )} command(s)"
243+ )
143244
144245
145246def _scenario_params () -> list :
@@ -165,6 +266,7 @@ def test_ask_agent_live(
165266 request : pytest .FixtureRequest ,
166267) -> None :
167268 iteration_label = f"[iteration { iteration + 1 } /{ ITERATIONS } ]"
269+ _log (f"{ iteration_label } starting scenario { scenario .name } " )
168270 if RUN_LIVE :
169271 env = _load_env ()
170272
@@ -178,7 +280,7 @@ def test_ask_agent_live(
178280 env .update (scenario .env_overrides )
179281
180282 if iteration == 0 and scenario .before_commands and not aks_skip_setup :
181- print (f"{ iteration_label } running setup commands for { scenario .name } " )
283+ _log (f"{ iteration_label } running setup commands for { scenario .name } " )
182284 _run_commands (scenario .before_commands , env , "setup" , scenario )
183285
184286 command = _build_command (
@@ -188,7 +290,7 @@ def test_ask_agent_live(
188290 cluster_name = cluster_name ,
189291 )
190292
191- print (f"{ iteration_label } invoking AKS Agent CLI for { scenario .name } " )
293+ _log (f"{ iteration_label } invoking AKS Agent CLI for { scenario .name } " )
192294 try :
193295 raw_output = _run_cli (command , env )
194296 answer = ""
@@ -216,11 +318,11 @@ def test_ask_agent_live(
216318 classifier_rationale = classifier_result .metadata .get (
217319 "rationale" , ""
218320 )
219- print (
321+ _log (
220322 f"{ iteration_label } classifier score for { scenario .name } : { classifier_score } "
221323 )
222324 if classifier_score is None :
223- print (
325+ _log (
224326 f"{ iteration_label } classifier returned no score for { scenario .name } ; falling back to substring checks"
225327 )
226328 else :
@@ -230,7 +332,7 @@ def test_ask_agent_live(
230332 if not error_message :
231333 error_message = "Classifier judged answer incorrect"
232334 else :
233- print (
335+ _log (
234336 f"{ iteration_label } classifier unavailable for { scenario .name } ; falling back to substring checks"
235337 )
236338
@@ -280,21 +382,21 @@ def test_ask_agent_live(
280382
281383 if GENERATE_MOCKS :
282384 mock_path = save_mock_answer (scenario .mock_path , answer )
283- print (f"{ iteration_label } [mock] wrote response to { mock_path } " )
385+ _log (f"{ iteration_label } [mock] wrote response to { mock_path } " )
284386 finally :
285387 if (
286388 iteration == ITERATIONS - 1
287389 and scenario .after_commands
288390 and not aks_skip_cleanup
289391 ):
290- print (f"{ iteration_label } running cleanup commands for { scenario .name } " )
392+ _log (f"{ iteration_label } running cleanup commands for { scenario .name } " )
291393 _run_commands (scenario .after_commands , env , "cleanup" , scenario )
292394 else :
293395 if GENERATE_MOCKS :
294396 pytest .fail ("GENERATE_MOCKS requires RUN_LIVE=true" )
295397 try :
296398 answer = load_mock_answer (scenario .mock_path )
297- print (f"{ iteration_label } replayed mock response for { scenario .name } " )
399+ _log (f"{ iteration_label } replayed mock response for { scenario .name } " )
298400 except FileNotFoundError :
299401 pytest .skip (f"Mock response missing for scenario { scenario .name } ; rerun with RUN_LIVE=true GENERATE_MOCKS=true" )
300402
@@ -328,5 +430,6 @@ def test_ask_agent_live(
328430 _set_user_property (request , 'braintrust_root_span_id' , str (root_span_id ))
329431 if url :
330432 _set_user_property (request , 'braintrust_experiment_url' , str (url ))
433+ _log (f"{ iteration_label } completed scenario { scenario .name } (passed={ passed } )" )
331434 if not passed :
332435 pytest .fail (f"Scenario { scenario .name } : { error } \n AI answer:\n { answer } " )
0 commit comments