Skip to content

Commit c4d843b

Browse files
d-nettoRAI CI (GitHub Action Automation)
authored andcommitted
RAI: --safe-crash-log-file flag
1 parent 7b739d7 commit c4d843b

File tree

9 files changed

+102
-10
lines changed

9 files changed

+102
-10
lines changed

base/options.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ struct JLOptions
6464
trim::Int8
6565
task_metrics::Int8
6666
timeout_for_safepoint_straggler_s::Int16
67+
safe_crash_log_file::Ptr{UInt8}
6768
end
6869

6970
# This runs early in the sysimage != is not defined yet

src/init.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -757,6 +757,15 @@ JL_DLLEXPORT void jl_init_(jl_image_buf_t sysimage)
757757
if (jl_options.handle_signals == JL_OPTIONS_HANDLE_SIGNALS_ON)
758758
jl_install_default_signal_handlers();
759759

760+
#if (defined(_OS_LINUX_) && defined(_CPU_X86_64_)) || (defined(_OS_DARWIN_) && defined(_CPU_AARCH64_))
761+
if (jl_options.safe_crash_log_file != NULL) {
762+
jl_sig_fd = open(jl_options.safe_crash_log_file, O_WRONLY | O_CREAT | O_APPEND, 0600);
763+
if (jl_sig_fd == -1) {
764+
jl_error("fatal error: could not open safe crash log file for writing");
765+
}
766+
}
767+
#endif
768+
760769
jl_gc_init();
761770

762771
arraylist_new(&jl_linkage_blobs, 0);

src/jl_uv.c

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "errno.h"
1616
#include <unistd.h>
1717
#include <sys/socket.h>
18+
#include <sys/time.h>
1819
#endif
1920

2021
#include "julia.h"
@@ -812,6 +813,56 @@ JL_DLLEXPORT int jl_printf(uv_stream_t *s, const char *format, ...)
812813
return c;
813814
}
814815

816+
STATIC_INLINE void print_error_msg_as_json(char *buf) JL_NOTSAFEPOINT
817+
{
818+
// Our telemetry on SPCS expects a JSON object per line
819+
// The following lines prepare the timestamp string and the JSON object
820+
struct timeval tv;
821+
struct tm* tm_info;
822+
char timestamp_buffer[50];
823+
// Get current time
824+
gettimeofday(&tv, NULL);
825+
tm_info = gmtime(&tv.tv_sec);
826+
// Format time
827+
int offset = strftime(timestamp_buffer, 25, "%Y-%m-%dT%H:%M:%S", tm_info);
828+
// Append milliseconds
829+
snprintf(timestamp_buffer + offset, 25, ".%03d", tv.tv_usec / 1000);
830+
const char *json_preamble_p1 = "\n{\"level\":\"Error\", \"timestamp\":\"";
831+
const char *json_preamble_p2 = "\", \"message\": \"";
832+
const char *json_postamble = "\"}\n";
833+
// Ignore write failures because there is nothing we can do
834+
write(jl_sig_fd, json_preamble_p1, strlen(json_preamble_p1));
835+
write(jl_sig_fd, timestamp_buffer, strlen(timestamp_buffer));
836+
write(jl_sig_fd, json_preamble_p2, strlen(json_preamble_p2));
837+
// JSON escape the input string
838+
for(size_t i = 0; i < strlen(buf); i += 1) {
839+
switch (buf[i]) {
840+
case '"':
841+
write(jl_sig_fd, "\\\"", 2);
842+
break;
843+
case '\b':
844+
write(jl_sig_fd, "\\b", 2);
845+
break;
846+
case '\n':
847+
write(jl_sig_fd, "\\n", 2);
848+
break;
849+
case '\r':
850+
write(jl_sig_fd, "\\r", 2);
851+
break;
852+
case '\t':
853+
write(jl_sig_fd, "\\t", 2);
854+
break;
855+
case '\\':
856+
write(jl_sig_fd, "\\\\", 2);
857+
break;
858+
default:
859+
write(jl_sig_fd, buf + i, 1);
860+
}
861+
}
862+
write(jl_sig_fd, json_postamble, strlen(json_postamble));
863+
fdatasync(jl_sig_fd);
864+
}
865+
815866
JL_DLLEXPORT void jl_safe_printf(const char *fmt, ...)
816867
{
817868
static char buf[1000];
@@ -828,6 +879,9 @@ JL_DLLEXPORT void jl_safe_printf(const char *fmt, ...)
828879
va_end(args);
829880

830881
buf[999] = '\0';
882+
if (jl_inside_signal_handler() && jl_sig_fd != 0) {
883+
print_error_msg_as_json(buf);
884+
}
831885
if (write(STDERR_FILENO, buf, strlen(buf)) < 0) {
832886
// nothing we can do; ignore the failure
833887
}

src/jloptions.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ JL_DLLEXPORT void jl_init_options(void)
155155
JL_TRIM_NO, // trim
156156
0, // task_metrics
157157
-1, // timeout_for_safepoint_straggler_s
158+
NULL, // safe_crash_log_file
158159
};
159160
jl_options_initialized = 1;
160161
}
@@ -384,6 +385,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
384385
opt_permalloc_pkgimg,
385386
opt_trim,
386387
opt_experimental_features,
388+
opt_safe_crash_log_file,
387389
};
388390
static const char* const shortopts = "+vhqH:e:E:L:J:C:it:p:O:g:m:";
389391
static const struct option longopts[] = {
@@ -452,6 +454,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
452454
{ "permalloc-pkgimg",required_argument, 0, opt_permalloc_pkgimg },
453455
{ "heap-size-hint", required_argument, 0, opt_heap_size_hint },
454456
{ "trim", optional_argument, 0, opt_trim },
457+
{ "safe-crash-log-file", required_argument, 0, opt_safe_crash_log_file },
455458
{ 0, 0, 0, 0 }
456459
};
457460

@@ -1008,6 +1011,10 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
10081011
jl_options.task_metrics = JL_OPTIONS_TASK_METRICS_ON;
10091012
else
10101013
jl_errorf("julia: invalid argument to --task-metrics={yes|no} (%s)", optarg);
1014+
case opt_safe_crash_log_file:
1015+
jl_options.safe_crash_log_file = strdup(optarg);
1016+
if (jl_options.safe_crash_log_file == NULL)
1017+
jl_error("julia: failed to allocate memory for --safe-crash-log-file");
10111018
break;
10121019
default:
10131020
jl_errorf("julia: unhandled option -- %c\n"

src/jloptions.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ typedef struct {
6868
int8_t trim;
6969
int8_t task_metrics;
7070
int16_t timeout_for_safepoint_straggler_s;
71+
const char *safe_crash_log_file;
7172
} jl_options_t;
7273

7374
#endif

src/julia_internal.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -785,6 +785,32 @@ JL_CALLABLE(jl_f_opaque_closure_call);
785785
void jl_install_default_signal_handlers(void);
786786
void restore_signals(void);
787787
void jl_install_thread_signal_handler(jl_ptls_t ptls);
788+
extern const size_t sig_stack_size;
789+
STATIC_INLINE int is_addr_on_sigstack(jl_ptls_t ptls, void *ptr)
790+
{
791+
// One guard page for signal_stack.
792+
return !((char*)ptr < (char*)ptls->signal_stack - jl_page_size ||
793+
(char*)ptr > (char*)ptls->signal_stack + sig_stack_size);
794+
}
795+
STATIC_INLINE int jl_inside_signal_handler(void)
796+
{
797+
#if (defined(_OS_LINUX_) && defined(_CPU_X86_64_)) || (defined(_OS_DARWIN_) && defined(_CPU_AARCH64_))
798+
// Read the stack pointer
799+
size_t sp;
800+
#if defined(_OS_LINUX_) && defined(_CPU_X86_64_)
801+
__asm__ __volatile__("movq %%rsp, %0" : "=r"(sp));
802+
#elif defined(_OS_DARWIN_) && defined(_CPU_AARCH64_)
803+
__asm__ __volatile__("mov %0, sp" : "=r"(sp));
804+
#endif
805+
// Check if the stack pointer is within the signal stack
806+
jl_ptls_t ptls = jl_current_task->ptls;
807+
return is_addr_on_sigstack(ptls, (void*)sp);
808+
#else
809+
return 0;
810+
#endif
811+
}
812+
// File-descriptor for safe logging on signal handling
813+
extern int jl_sig_fd;
788814

789815
JL_DLLEXPORT jl_fptr_args_t jl_get_builtin_fptr(jl_datatype_t *dt);
790816

src/signal-handling.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ static const uint64_t GIGA = 1000000000ULL;
3030
// Timers to take samples at intervals
3131
JL_DLLEXPORT void jl_profile_stop_timer(void);
3232
JL_DLLEXPORT int jl_profile_start_timer(uint8_t);
33+
// File-descriptor for safe logging on signal handling
34+
int jl_sig_fd;
3335

3436
///////////////////////
3537
// Utility functions //

src/signals-unix.c

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
#endif
4343

4444
// 8M signal stack, same as default stack size (though we barely use this)
45-
static const size_t sig_stack_size = 8 * 1024 * 1024;
45+
const size_t sig_stack_size = 8 * 1024 * 1024;
4646

4747
#include "julia_assert.h"
4848

@@ -102,14 +102,6 @@ static inline uintptr_t jl_get_rsp_from_ctx(const void *_ctx)
102102
#endif
103103
}
104104

105-
static int is_addr_on_sigstack(jl_ptls_t ptls, void *ptr) JL_NOTSAFEPOINT
106-
{
107-
// One guard page for signal_stack.
108-
return ptls->signal_stack == NULL ||
109-
((char*)ptr >= (char*)ptls->signal_stack - jl_page_size &&
110-
(char*)ptr <= (char*)ptls->signal_stack + (ptls->signal_stack_size ? ptls->signal_stack_size : sig_stack_size));
111-
}
112-
113105
// Modify signal context `_ctx` so that `fptr` will execute when the signal returns
114106
// The function `fptr` itself must not return.
115107
JL_NO_ASAN static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int sig, void *_ctx)

src/signals-win.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
// Note that this file is `#include`d by "signal-handling.c"
55
#include <mmsystem.h> // hidden by LEAN_AND_MEAN
66

7-
static const size_t sig_stack_size = 131072; // 128k reserved for backtrace_fiber for stack overflow handling
7+
const size_t sig_stack_size = 131072; // 128k reserved for backtrace_fiber for stack overflow handling
88

99
// Copied from MINGW_FLOAT_H which may not be found due to a collision with the builtin gcc float.h
1010
// eventually we can probably integrate this into OpenLibm.

0 commit comments

Comments
 (0)