Skip to content

Commit 89c30aa

Browse files
Merge pull request #7 from trailofbits/fix-startup-timeout
Use readiness pipe for idalib startup
2 parents c1c7e54 + 07c925c commit 89c30aa

3 files changed

Lines changed: 268 additions & 67 deletions

File tree

src/idac/transport/idalib.py

Lines changed: 83 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
import contextlib
44
import json
5+
import os
6+
import selectors
57
import socket
68
import subprocess
79
import sys
@@ -22,8 +24,7 @@
2224
from .schema import RequestEnvelope, response_ok
2325

2426
IDALIB_CONNECT_RETRIES = 3
25-
IDALIB_STARTUP_PROBE_TIMEOUT = 1.0
26-
IDALIB_STARTUP_TIMEOUT = 10.0
27+
IDALIB_READY_MAX_BYTES = 65_536
2728

2829

2930
@dataclass(frozen=True)
@@ -163,10 +164,45 @@ def _build_target_row(instance: IdaLibInstance) -> dict[str, Any]:
163164
)
164165

165166

167+
def _terminate_process(proc: subprocess.Popen[str]) -> None:
168+
with contextlib.suppress(ProcessLookupError):
169+
proc.terminate()
170+
try:
171+
proc.wait(timeout=5.0)
172+
except subprocess.TimeoutExpired:
173+
with contextlib.suppress(ProcessLookupError):
174+
proc.kill()
175+
with contextlib.suppress(subprocess.TimeoutExpired):
176+
proc.wait(timeout=5.0)
177+
178+
179+
def _read_ready_payload(read_fd: int, *, timeout: float | None) -> dict[str, Any]:
180+
try:
181+
with selectors.DefaultSelector() as selector:
182+
selector.register(read_fd, selectors.EVENT_READ)
183+
events = selector.select(timeout)
184+
if not events:
185+
raise socket.timeout()
186+
raw = os.read(read_fd, IDALIB_READY_MAX_BYTES + 1)
187+
finally:
188+
with contextlib.suppress(OSError):
189+
os.close(read_fd)
190+
191+
if not raw:
192+
raise EOFError("idalib daemon exited before reporting readiness")
193+
if len(raw) > IDALIB_READY_MAX_BYTES:
194+
raise RuntimeError("idalib daemon readiness payload is too large")
195+
raw = raw.split(b"\n", 1)[0].strip()
196+
payload = json.loads(raw.decode("utf-8"))
197+
if not isinstance(payload, dict):
198+
raise RuntimeError("idalib daemon returned a non-object readiness payload")
199+
return payload
200+
201+
166202
def _start_daemon_for_database(
167203
database_path: str,
168204
*,
169-
probe_timeout: float,
205+
startup_timeout: float | None,
170206
run_auto_analysis: bool,
171207
) -> IdaLibInstance:
172208
ensure_user_runtime_dir()
@@ -179,40 +215,53 @@ def _start_daemon_for_database(
179215
]
180216
if not run_auto_analysis:
181217
cmd.append("--no-auto-analysis")
218+
read_fd, write_fd = os.pipe()
219+
os.set_inheritable(write_fd, True)
220+
cmd.extend(["--ready-fd", str(write_fd)])
182221
with tempfile.TemporaryFile("w+", encoding="utf-8") as stderr_log:
183-
proc = subprocess.Popen(
184-
cmd,
185-
stdin=subprocess.DEVNULL,
186-
stdout=subprocess.DEVNULL,
187-
stderr=stderr_log,
188-
text=True,
189-
start_new_session=True,
190-
)
222+
try:
223+
proc = subprocess.Popen(
224+
cmd,
225+
stdin=subprocess.DEVNULL,
226+
stdout=subprocess.DEVNULL,
227+
stderr=stderr_log,
228+
text=True,
229+
start_new_session=True,
230+
pass_fds=(write_fd,),
231+
)
232+
except Exception:
233+
with contextlib.suppress(OSError):
234+
os.close(read_fd)
235+
raise
236+
finally:
237+
with contextlib.suppress(OSError):
238+
os.close(write_fd)
191239

192240
expected_registry = idalib_registry_path(proc.pid)
193-
deadline = time.monotonic() + IDALIB_STARTUP_TIMEOUT
194-
while time.monotonic() < deadline:
195-
if proc.poll() is not None:
196-
stderr_log.seek(0)
197-
detail = stderr_log.read().strip()
198-
if detail:
199-
raise RuntimeError(detail)
200-
raise RuntimeError(f"idalib daemon failed to start for `{database_path}`")
241+
try:
242+
payload = _read_ready_payload(read_fd, timeout=startup_timeout)
243+
if not bool(payload.get("ok")):
244+
raise RuntimeError(str(payload.get("error") or "idalib daemon failed to start"))
201245
instance = _instance_from_registry(expected_registry)
202-
if instance is not None and instance.database_path == database_path:
203-
try:
204-
if _probe_instance(
205-
instance,
206-
timeout=probe_timeout,
207-
purge_on_failure=False,
208-
):
209-
return instance
210-
except socket.timeout:
211-
pass
212-
time.sleep(0.05)
213-
with contextlib.suppress(ProcessLookupError):
214-
proc.terminate()
215-
raise RuntimeError(f"timed out waiting for idalib daemon to start for `{database_path}`")
246+
if instance is None:
247+
raise RuntimeError("idalib daemon reported readiness but registry was unavailable")
248+
if instance.database_path != database_path:
249+
raise RuntimeError(
250+
f"idalib daemon opened `{instance.database_path}` while `{database_path}` was requested"
251+
)
252+
return instance
253+
except socket.timeout as exc:
254+
_terminate_process(proc)
255+
raise RuntimeError(
256+
f"timed out after {_timeout_text(startup_timeout)} waiting for idalib daemon "
257+
f"to start for `{database_path}`"
258+
) from exc
259+
except EOFError as exc:
260+
stderr_log.seek(0)
261+
detail = stderr_log.read().strip()
262+
if detail:
263+
raise RuntimeError(detail) from exc
264+
raise RuntimeError(f"idalib daemon failed to start for `{database_path}`") from exc
216265

217266

218267
def _ensure_instance_for_database(
@@ -235,7 +284,7 @@ def _ensure_instance_for_database(
235284
return (
236285
_start_daemon_for_database(
237286
normalized,
238-
probe_timeout=IDALIB_STARTUP_PROBE_TIMEOUT,
287+
startup_timeout=timeout,
239288
run_auto_analysis=run_auto_analysis,
240289
),
241290
False,

src/idac/transport/idalib_server.py

Lines changed: 37 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,18 @@ def _format_open_error(path: str, rc: int) -> str:
5050
return f"failed to open database `{path}`: rc={rc}"
5151

5252

53+
def _write_ready(ready_fd: int | None, payload: dict[str, Any]) -> None:
54+
if ready_fd is None:
55+
return
56+
try:
57+
os.write(ready_fd, (json.dumps(payload) + "\n").encode("utf-8"))
58+
except OSError:
59+
pass
60+
finally:
61+
with contextlib.suppress(OSError):
62+
os.close(ready_fd)
63+
64+
5365
def _other_live_instance_for_database(database_path: str) -> dict[str, Any] | None:
5466
normalized = normalize_database_path(database_path)
5567
for registry_path in idalib_registry_paths():
@@ -245,35 +257,48 @@ class _UnixIdaLibServer(socketserver.UnixStreamServer):
245257
request_queue_size = 64
246258

247259

248-
def serve(*, database_path: str, run_auto_analysis: bool) -> int:
260+
def serve(*, database_path: str, run_auto_analysis: bool, ready_fd: int | None = None) -> int:
249261
ensure_user_runtime_dir()
250262
socket_path = idalib_socket_path(os.getpid())
251263
registry_path = idalib_registry_path(os.getpid())
252264
with contextlib.suppress(FileNotFoundError):
253265
socket_path.unlink()
254-
service = IdaLibService(
255-
database_path=database_path,
256-
run_auto_analysis=run_auto_analysis,
257-
)
258-
server = _UnixIdaLibServer(str(socket_path), _IdaLibRequestHandler)
259-
server.service = service # type: ignore[attr-defined]
260-
service._write_registry()
266+
service: IdaLibService | None = None
267+
server: _UnixIdaLibServer | None = None
261268
try:
269+
service = IdaLibService(
270+
database_path=database_path,
271+
run_auto_analysis=run_auto_analysis,
272+
)
273+
server = _UnixIdaLibServer(str(socket_path), _IdaLibRequestHandler)
274+
server.service = service # type: ignore[attr-defined]
275+
service._write_registry()
276+
_write_ready(ready_fd, {"ok": True})
277+
ready_fd = None
262278
server.serve_forever()
279+
except Exception as exc:
280+
_write_ready(ready_fd, {"ok": False, "error": str(exc)})
281+
raise
263282
finally:
264-
server.server_close()
283+
if server is not None:
284+
server.server_close()
265285
with contextlib.suppress(FileNotFoundError):
266286
socket_path.unlink()
267287
with contextlib.suppress(FileNotFoundError):
268288
registry_path.unlink()
269-
with contextlib.suppress(Exception):
270-
service.close_runtime(save=False)
289+
if service is not None:
290+
with contextlib.suppress(Exception):
291+
service.close_runtime(save=False)
292+
with contextlib.suppress(OSError):
293+
if ready_fd is not None:
294+
os.close(ready_fd)
271295
return 0
272296

273297

274298
def build_parser() -> argparse.ArgumentParser:
275299
parser = argparse.ArgumentParser(prog="python -m idac.transport.idalib_server")
276300
parser.add_argument("--database", required=True, help="Database or input path to open")
301+
parser.add_argument("--ready-fd", type=int, default=None, help=argparse.SUPPRESS)
277302
parser.add_argument(
278303
"--no-auto-analysis",
279304
dest="run_auto_analysis",
@@ -291,6 +316,7 @@ def main(argv: Optional[list[str]] = None) -> int:
291316
return serve(
292317
database_path=args.database,
293318
run_auto_analysis=bool(args.run_auto_analysis),
319+
ready_fd=args.ready_fd,
294320
)
295321
except WorkerError as exc:
296322
print(str(exc), file=sys.stderr)

0 commit comments

Comments
 (0)