Skip to content

Commit 9895a5c

Browse files
jpnurmisupervacuus
andauthored
fix: AOT interop with managed .NET runtimes (#1392)
Co-authored-by: Mischan Toosarani-Hausberger <[email protected]>
1 parent 853bf2d commit 9895a5c

File tree

4 files changed

+238
-32
lines changed

4 files changed

+238
-32
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
- Add logs flush on crash. This is not available for macOS with the `crashpad` backend. ([#1404](https://github.com/getsentry/sentry-native/pull/1404))
1212
- Make narrow UTF-8 the canonical path encoding on Windows. ([#1413](https://github.com/getsentry/sentry-native/pull/1413))
1313
- Re-add setting thread name for Windows transport. ([#1424](https://github.com/getsentry/sentry-native/pull/1424))
14+
- Fix AOT interop with managed .NET runtimes. ([#1392](https://github.com/getsentry/sentry-native/pull/1392))
1415

1516
**Internal**:
1617

src/backends/sentry_backend_inproc.c

Lines changed: 92 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -456,9 +456,48 @@ registers_from_uctx(const sentry_ucontext_t *uctx)
456456
return registers;
457457
}
458458

459+
#ifdef SENTRY_PLATFORM_LINUX
460+
static uintptr_t
461+
get_stack_pointer(const sentry_ucontext_t *uctx)
462+
{
463+
# if defined(__i386__)
464+
return uctx->user_context->uc_mcontext.gregs[REG_ESP];
465+
# elif defined(__x86_64__)
466+
return uctx->user_context->uc_mcontext.gregs[REG_RSP];
467+
# elif defined(__arm__)
468+
return uctx->user_context->uc_mcontext.arm_sp;
469+
# elif defined(__aarch64__)
470+
return uctx->user_context->uc_mcontext.sp;
471+
# else
472+
SENTRY_WARN("get_stack_pointer is not implemented for this architecture. "
473+
"Signal chaining may not work as expected.");
474+
return NULL;
475+
# endif
476+
}
477+
478+
static uintptr_t
479+
get_instruction_pointer(const sentry_ucontext_t *uctx)
480+
{
481+
# if defined(__i386__)
482+
return uctx->user_context->uc_mcontext.gregs[REG_EIP];
483+
# elif defined(__x86_64__)
484+
return uctx->user_context->uc_mcontext.gregs[REG_RIP];
485+
# elif defined(__arm__)
486+
return uctx->user_context->uc_mcontext.arm_pc;
487+
# elif defined(__aarch64__)
488+
return uctx->user_context->uc_mcontext.pc;
489+
# else
490+
SENTRY_WARN(
491+
"get_instruction_pointer is not implemented for this architecture. "
492+
"Signal chaining may not work as expected.");
493+
return NULL;
494+
# endif
495+
}
496+
#endif
497+
459498
static sentry_value_t
460-
make_signal_event(
461-
const struct signal_slot *sig_slot, const sentry_ucontext_t *uctx)
499+
make_signal_event(const struct signal_slot *sig_slot,
500+
const sentry_ucontext_t *uctx, sentry_handler_strategy_t strategy)
462501
{
463502
sentry_value_t event = sentry_value_new_event();
464503
sentry_value_set_by_key(
@@ -496,8 +535,10 @@ make_signal_event(
496535
"captured backtrace from ucontext with %lu frames", frame_count);
497536
// if unwinding from a ucontext didn't yield any results, try again with a
498537
// direct unwind. this is most likely the case when using `libbacktrace`,
499-
// since that does not allow to unwind from a ucontext at all.
500-
if (!frame_count) {
538+
// since that does not allow to unwind from a ucontext at all. the fallback
539+
// is skipped with the "chain at start" strategy because `libbacktrace`
540+
// crashes, and would likely not provide helpful information anyway.
541+
if (!frame_count && strategy != SENTRY_HANDLER_STRATEGY_CHAIN_AT_START) {
501542
frame_count = sentry_unwind_stack(NULL, &backtrace[0], MAX_FRAMES);
502543
}
503544
SENTRY_DEBUGF("captured backtrace with %lu frames", frame_count);
@@ -534,20 +575,7 @@ handle_ucontext(const sentry_ucontext_t *uctx)
534575

535576
SENTRY_INFO("entering signal handler");
536577

537-
const struct signal_slot *sig_slot = NULL;
538-
for (int i = 0; i < SIGNAL_COUNT; ++i) {
539-
#ifdef SENTRY_PLATFORM_UNIX
540-
if (SIGNAL_DEFINITIONS[i].signum == uctx->signum) {
541-
#elif defined SENTRY_PLATFORM_WINDOWS
542-
if (SIGNAL_DEFINITIONS[i].signum
543-
== uctx->exception_ptrs.ExceptionRecord->ExceptionCode) {
544-
#else
545-
# error Unsupported platform
546-
#endif
547-
sig_slot = &SIGNAL_DEFINITIONS[i];
548-
}
549-
}
550-
578+
sentry_handler_strategy_t strategy = SENTRY_HANDLER_STRATEGY_DEFAULT;
551579
#ifdef SENTRY_PLATFORM_UNIX
552580
// inform the sentry_sync system that we're in a signal handler. This will
553581
// make mutexes spin on a spinlock instead as it's no longer safe to use a
@@ -556,42 +584,77 @@ handle_ucontext(const sentry_ucontext_t *uctx)
556584
#endif
557585

558586
SENTRY_WITH_OPTIONS (options) {
559-
// Flush logs in a crash-safe manner before crash handling
560-
if (options->enable_logs) {
561-
sentry__logs_flush_crash_safe();
562-
}
563587
#ifdef SENTRY_PLATFORM_LINUX
564588
// On Linux (and thus Android) CLR/Mono converts signals provoked by
565589
// AOT/JIT-generated native code into managed code exceptions. In these
566590
// cases, we shouldn't react to the signal at all and let their handler
567591
// discontinue the signal chain by invoking the runtime handler before
568592
// we process the signal.
569-
if (sentry_options_get_handler_strategy(options)
570-
== SENTRY_HANDLER_STRATEGY_CHAIN_AT_START) {
593+
strategy = sentry_options_get_handler_strategy(options);
594+
if (strategy == SENTRY_HANDLER_STRATEGY_CHAIN_AT_START) {
571595
SENTRY_DEBUG("defer to runtime signal handler at start");
572596
// there is a good chance that we won't return from the previous
573597
// handler and that would mean we couldn't enter this handler with
574598
// the next signal coming in if we didn't "leave" here.
575599
sentry__leave_signal_handler();
600+
if (!options->enable_logging_when_crashed) {
601+
sentry__logger_enable();
602+
}
603+
604+
uintptr_t ip = get_instruction_pointer(uctx);
605+
uintptr_t sp = get_stack_pointer(uctx);
576606

577607
// invoke the previous handler (typically the CLR/Mono
578608
// signal-to-managed-exception handler)
579609
invoke_signal_handler(
580610
uctx->signum, uctx->siginfo, (void *)uctx->user_context);
581611

612+
// If the execution returns here in AOT mode, and the instruction
613+
// or stack pointer were changed, it means CLR/Mono converted the
614+
// signal into a managed exception and transferred execution to a
615+
// managed exception handler.
616+
// https://github.com/dotnet/runtime/blob/6d96e28597e7da0d790d495ba834cc4908e442cd/src/mono/mono/mini/exceptions-arm64.c#L538
617+
if (ip != get_instruction_pointer(uctx)
618+
|| sp != get_stack_pointer(uctx)) {
619+
SENTRY_DEBUG("runtime converted the signal to a managed "
620+
"exception, we do not handle the signal");
621+
return;
622+
}
623+
582624
// let's re-enter because it means this was an actual native crash
625+
if (!options->enable_logging_when_crashed) {
626+
sentry__logger_disable();
627+
}
583628
sentry__enter_signal_handler();
584629
SENTRY_DEBUG(
585630
"return from runtime signal handler, we handle the signal");
586631
}
587632
#endif
588633

634+
const struct signal_slot *sig_slot = NULL;
635+
for (int i = 0; i < SIGNAL_COUNT; ++i) {
636+
#ifdef SENTRY_PLATFORM_UNIX
637+
if (SIGNAL_DEFINITIONS[i].signum == uctx->signum) {
638+
#elif defined SENTRY_PLATFORM_WINDOWS
639+
if (SIGNAL_DEFINITIONS[i].signum
640+
== uctx->exception_ptrs.ExceptionRecord->ExceptionCode) {
641+
#else
642+
# error Unsupported platform
643+
#endif
644+
sig_slot = &SIGNAL_DEFINITIONS[i];
645+
}
646+
}
647+
589648
#ifdef SENTRY_PLATFORM_UNIX
590649
// use a signal-safe allocator before we tear down.
591650
sentry__page_allocator_enable();
592651
#endif
652+
// Flush logs in a crash-safe manner before crash handling
653+
if (options->enable_logs) {
654+
sentry__logs_flush_crash_safe();
655+
}
593656

594-
sentry_value_t event = make_signal_event(sig_slot, uctx);
657+
sentry_value_t event = make_signal_event(sig_slot, uctx, strategy);
595658
bool should_handle = true;
596659
sentry__write_crash_marker(options);
597660

@@ -647,8 +710,10 @@ handle_ucontext(const sentry_ucontext_t *uctx)
647710
// forward as we're not restoring the page allocator.
648711
reset_signal_handlers();
649712
sentry__leave_signal_handler();
650-
invoke_signal_handler(
651-
uctx->signum, uctx->siginfo, (void *)uctx->user_context);
713+
if (strategy != SENTRY_HANDLER_STRATEGY_CHAIN_AT_START) {
714+
invoke_signal_handler(
715+
uctx->signum, uctx->siginfo, (void *)uctx->user_context);
716+
}
652717
#endif
653718
}
654719

tests/fixtures/dotnet_signal/Program.cs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ static void Main(string[] args)
4545
{
4646
native_crash();
4747
}
48-
else
48+
else if (args.Contains("managed-exception"))
4949
{
5050
try
5151
{
@@ -55,10 +55,13 @@ static void Main(string[] args)
5555
}
5656
catch (NullReferenceException exception)
5757
{
58-
Console.WriteLine("dereference another NULL object from managed code");
59-
var s = default(string);
60-
var c = s.Length;
6158
}
6259
}
60+
else if (args.Contains("unhandled-managed-exception"))
61+
{
62+
Console.WriteLine("dereference a NULL object from managed code (unhandled)");
63+
var s = default(string);
64+
var c = s.Length;
65+
}
6366
}
6467
}

tests/test_dotnet_signals.py

Lines changed: 138 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,11 @@ def run_dotnet(tmp_path, args):
4949

5050

5151
def run_dotnet_managed_exception(tmp_path):
52-
return run_dotnet(tmp_path, ["dotnet", "run"])
52+
return run_dotnet(tmp_path, ["dotnet", "run", "managed-exception"])
53+
54+
55+
def run_dotnet_unhandled_managed_exception(tmp_path):
56+
return run_dotnet(tmp_path, ["dotnet", "run", "unhandled-managed-exception"])
5357

5458

5559
def run_dotnet_native_crash(tmp_path):
@@ -84,9 +88,25 @@ def test_dotnet_signals_inproc(cmake):
8488
)
8589

8690
# this runs the dotnet program with the Native SDK and chain-at-start, when managed code raises a signal that CLR convert to an exception.
91+
# raising a signal that CLR converts to a managed exception, which is then handled by the managed code and
92+
# not leaked out to the native code so no crash is registered.
8793
dotnet_run = run_dotnet_managed_exception(tmp_path)
8894
dotnet_run_stdout, dotnet_run_stderr = dotnet_run.communicate()
8995

96+
# the program handles the `NullReferenceException`, so the Native SDK won't register a crash.
97+
assert dotnet_run.returncode == 0
98+
assert not (
99+
"NullReferenceException" in dotnet_run_stderr
100+
), f"Managed exception run failed.\nstdout:\n{dotnet_run_stdout}\nstderr:\n{dotnet_run_stderr}"
101+
database_path = project_fixture_path / ".sentry-native"
102+
assert database_path.exists(), "No database-path exists"
103+
assert not (database_path / "last_crash").exists(), "A crash was registered"
104+
assert_empty_run_dir(database_path)
105+
106+
# this runs the dotnet program with the Native SDK and chain-at-start, when managed code raises a signal that CLR convert to an exception.
107+
dotnet_run = run_dotnet_unhandled_managed_exception(tmp_path)
108+
dotnet_run_stdout, dotnet_run_stderr = dotnet_run.communicate()
109+
90110
# the program will fail with a `NullReferenceException`, but the Native SDK won't register a crash.
91111
assert dotnet_run.returncode != 0
92112
assert (
@@ -112,3 +132,120 @@ def test_dotnet_signals_inproc(cmake):
112132
shutil.rmtree(project_fixture_path / ".sentry-native", ignore_errors=True)
113133
shutil.rmtree(project_fixture_path / "bin", ignore_errors=True)
114134
shutil.rmtree(project_fixture_path / "obj", ignore_errors=True)
135+
136+
137+
def run_aot(tmp_path, args=None):
138+
if args is None:
139+
args = []
140+
env = os.environ.copy()
141+
env["LD_LIBRARY_PATH"] = str(tmp_path) + ":" + env.get("LD_LIBRARY_PATH", "")
142+
return subprocess.Popen(
143+
[str(tmp_path / "bin/test_dotnet")] + args,
144+
cwd=tmp_path,
145+
env=env,
146+
text=True,
147+
stdout=subprocess.PIPE,
148+
stderr=subprocess.PIPE,
149+
)
150+
151+
152+
def run_aot_managed_exception(tmp_path):
153+
return run_aot(tmp_path, ["managed-exception"])
154+
155+
156+
def run_aot_unhandled_managed_exception(tmp_path):
157+
return run_aot(tmp_path, ["unhandled-managed-exception"])
158+
159+
160+
def run_aot_native_crash(tmp_path):
161+
return run_aot(tmp_path, ["native-crash"])
162+
163+
164+
@pytest.mark.skipif(
165+
sys.platform != "linux" or is_x86 or is_asan or is_tsan,
166+
reason="dotnet AOT signal handling is currently only supported on 64-bit Linux without sanitizers",
167+
)
168+
def test_aot_signals_inproc(cmake):
169+
try:
170+
# build native client library with inproc and the example for crash dumping
171+
tmp_path = cmake(
172+
["sentry"],
173+
{"SENTRY_BACKEND": "inproc", "SENTRY_TRANSPORT": "none"},
174+
)
175+
176+
# build the crashing native library
177+
subprocess.run(
178+
[
179+
"gcc",
180+
"-Wall",
181+
"-Wextra",
182+
"-fPIC",
183+
"-shared",
184+
str(project_fixture_path / "crash.c"),
185+
"-o",
186+
str(tmp_path / "libcrash.so"),
187+
],
188+
check=True,
189+
)
190+
191+
# AOT-compile the dotnet program
192+
subprocess.run(
193+
[
194+
"dotnet",
195+
"publish",
196+
"-p:PublishAot=true",
197+
"-p:Configuration=Release",
198+
"-o",
199+
str(tmp_path / "bin"),
200+
],
201+
cwd=project_fixture_path,
202+
check=True,
203+
)
204+
205+
# this runs the dotnet program in AOT mode with the Native SDK and chain-at-start, and triggers a `NullReferenceException`
206+
# raising a signal that CLR converts to a managed exception, which is then handled by the managed code and
207+
# not leaked out to the native code so no crash is registered.
208+
dotnet_run = run_aot_managed_exception(tmp_path)
209+
dotnet_run_stdout, dotnet_run_stderr = dotnet_run.communicate()
210+
211+
# the program handles the `NullReferenceException`, so the Native SDK won't register a crash.
212+
assert dotnet_run.returncode == 0
213+
assert not (
214+
"NullReferenceException" in dotnet_run_stderr
215+
), f"Managed exception run failed.\nstdout:\n{dotnet_run_stdout}\nstderr:\n{dotnet_run_stderr}"
216+
database_path = tmp_path / ".sentry-native"
217+
assert database_path.exists(), "No database-path exists"
218+
assert not (database_path / "last_crash").exists(), "A crash was registered"
219+
assert_empty_run_dir(database_path)
220+
221+
# this runs the dotnet program in AOT mode with the Native SDK and chain-at-start, and triggers a `NullReferenceException`
222+
# raising a signal that CLR converts to a managed exception, which is then not handled by the managed code but
223+
# leaked out to the native code so a crash is registered.
224+
dotnet_run = run_aot_unhandled_managed_exception(tmp_path)
225+
dotnet_run_stdout, dotnet_run_stderr = dotnet_run.communicate()
226+
227+
# the program will fail with a `NullReferenceException`, so the Native SDK will register a crash.
228+
assert dotnet_run.returncode != 0
229+
assert (
230+
"NullReferenceException" in dotnet_run_stderr
231+
), f"Managed exception run failed.\nstdout:\n{dotnet_run_stdout}\nstderr:\n{dotnet_run_stderr}"
232+
database_path = tmp_path / ".sentry-native"
233+
assert database_path.exists(), "No database-path exists"
234+
assert (database_path / "last_crash").exists()
235+
assert_run_dir_with_envelope(database_path)
236+
237+
# this runs the dotnet program with the Native SDK and chain-at-start, when an actual native crash raises a signal
238+
dotnet_run = run_aot_native_crash(tmp_path)
239+
dotnet_run_stdout, dotnet_run_stderr = dotnet_run.communicate()
240+
241+
# the program will fail with a SIGSEGV, that has been processed by the Native SDK which produced a crash envelope
242+
assert dotnet_run.returncode != 0
243+
assert (
244+
"crash has been captured" in dotnet_run_stderr
245+
), f"Native exception run failed.\nstdout:\n{dotnet_run_stdout}\nstderr:\n{dotnet_run_stderr}"
246+
assert (database_path / "last_crash").exists()
247+
assert_run_dir_with_envelope(database_path)
248+
finally:
249+
shutil.rmtree(tmp_path / ".sentry-native", ignore_errors=True)
250+
shutil.rmtree(project_fixture_path / "bin", ignore_errors=True)
251+
shutil.rmtree(project_fixture_path / "obj", ignore_errors=True)

0 commit comments

Comments
 (0)