Skip to content

Commit 5b22bde

Browse files
committed
py/persistentcode: Explicitly track native BSS/rodata when needed.
Signed-off-by: Damien George <[email protected]>
1 parent f4ab9d9 commit 5b22bde

File tree

7 files changed

+144
-52
lines changed

7 files changed

+144
-52
lines changed

py/mpconfig.h

+40-18
Original file line numberDiff line numberDiff line change
@@ -425,18 +425,6 @@
425425
// Convenience definition for whether any native or inline assembler emitter is enabled
426426
#define MICROPY_EMIT_MACHINE_CODE (MICROPY_EMIT_NATIVE || MICROPY_EMIT_INLINE_ASM)
427427

428-
// Whether native relocatable code loaded from .mpy files is explicitly tracked
429-
// so that the GC cannot reclaim it. Needed on architectures that allocate
430-
// executable memory on the MicroPython heap and don't explicitly track this
431-
// data some other way.
432-
#ifndef MICROPY_PERSISTENT_CODE_TRACK_RELOC_CODE
433-
#if !MICROPY_EMIT_MACHINE_CODE || defined(MP_PLAT_ALLOC_EXEC) || defined(MP_PLAT_COMMIT_EXEC)
434-
#define MICROPY_PERSISTENT_CODE_TRACK_RELOC_CODE (0)
435-
#else
436-
#define MICROPY_PERSISTENT_CODE_TRACK_RELOC_CODE (1)
437-
#endif
438-
#endif
439-
440428
/*****************************************************************************/
441429
/* Compiler configuration */
442430

@@ -1992,14 +1980,48 @@ typedef double mp_float_t;
19921980
#define MICROPY_MAKE_POINTER_CALLABLE(p) (p)
19931981
#endif
19941982

1995-
// If these MP_PLAT_*_EXEC macros are overridden then the memory allocated by them
1996-
// must be somehow reachable for marking by the GC, since the native code
1997-
// generators store pointers to GC managed memory in the code.
1983+
// Whether native text/BSS/rodata memory loaded from .mpy files is explicitly tracked
1984+
// so that the GC cannot reclaim it.
1985+
//
1986+
// In general a port should let these options have their defaults, but the defaults here
1987+
// can be overridden if needed by defining both MICROPY_PERSISTENT_CODE_TRACK_FUN_DATA
1988+
// and MICROPY_PERSISTENT_CODE_TRACK_BSS_RODATA.
1989+
#ifndef MICROPY_PERSISTENT_CODE_TRACK_FUN_DATA
1990+
#if MICROPY_EMIT_MACHINE_CODE && MICROPY_PERSISTENT_CODE_LOAD
1991+
// Pointer tracking is required when loading native code is enabled.
1992+
#if defined(MP_PLAT_ALLOC_EXEC) || defined(MP_PLAT_COMMIT_EXEC)
1993+
// If a port defined a custom allocator or commit function for native text, then the
1994+
// text does not need to be tracked (its allocation is managed by the port). But the
1995+
// BSS/rodata must be tracked (if there is any) because if there are any pointers to it
1996+
// in the function data, they aren't traced by the GC.
1997+
#define MICROPY_PERSISTENT_CODE_TRACK_FUN_DATA (0)
1998+
#define MICROPY_PERSISTENT_CODE_TRACK_BSS_RODATA (1)
1999+
#else
2000+
// If a port uses the default allocator (the GC heap) then all native text is allocated
2001+
// on the GC heap. But it's not guaranteed that a pointer to the head of the block of
2002+
// native text (which may contain multiple native functions) will be retained for the GC
2003+
// to trace. This is because native functions can start inside the big block of text
2004+
// and so it's possible that the only GC-reachable pointers are pointers inside.
2005+
// Therefore the big block is explicitly tracked. If there is any BSS/rodata memory,
2006+
// then it does not need to be explicitly tracked because a pointer to it is stored into
2007+
// the function text via `mp_native_relocate()`.
2008+
#define MICROPY_PERSISTENT_CODE_TRACK_FUN_DATA (1)
2009+
#define MICROPY_PERSISTENT_CODE_TRACK_BSS_RODATA (0)
2010+
#endif
2011+
#else // MICROPY_EMIT_MACHINE_CODE && MICROPY_PERSISTENT_CODE_LOAD
2012+
// Pointer tracking not needed when loading native code is disabled.
2013+
#define MICROPY_PERSISTENT_CODE_TRACK_FUN_DATA (0)
2014+
#define MICROPY_PERSISTENT_CODE_TRACK_BSS_RODATA (0)
2015+
#endif
2016+
#endif
2017+
2018+
// If these macros are defined then the memory allocated by them does not need to be
2019+
// traced by the GC. But if they are left undefined then the GC heap will be used as
2020+
// the allocator and the memory must be traced by the GC. See also above logic for
2021+
// enabling MICROPY_PERSISTENT_CODE_TRACK_FUN_DATA and
2022+
// MICROPY_PERSISTENT_CODE_TRACK_BSS_RODATA.
19982023
#ifndef MP_PLAT_ALLOC_EXEC
19992024
#define MP_PLAT_ALLOC_EXEC(min_size, ptr, size) do { *ptr = m_new(byte, min_size); *size = min_size; } while (0)
2000-
#endif
2001-
2002-
#ifndef MP_PLAT_FREE_EXEC
20032025
#define MP_PLAT_FREE_EXEC(ptr, size) m_del(byte, ptr, size)
20042026
#endif
20052027

py/persistentcode.c

+21-20
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,20 @@ typedef struct _bytecode_prelude_t {
7272
static int read_byte(mp_reader_t *reader);
7373
static size_t read_uint(mp_reader_t *reader);
7474

75+
#if MICROPY_PERSISTENT_CODE_TRACK_FUN_DATA || MICROPY_PERSISTENT_CODE_TRACK_BSS_RODATA
76+
77+
// An mp_obj_list_t that tracks native text/BSS/rodata to prevent the GC from reclaiming them.
78+
MP_REGISTER_ROOT_POINTER(mp_obj_t persistent_code_root_pointers);
79+
80+
static void track_root_pointer(void *ptr) {
81+
if (MP_STATE_PORT(persistent_code_root_pointers) == MP_OBJ_NULL) {
82+
MP_STATE_PORT(persistent_code_root_pointers) = mp_obj_new_list(0, NULL);
83+
}
84+
mp_obj_list_append(MP_STATE_PORT(persistent_code_root_pointers), MP_OBJ_FROM_PTR(ptr));
85+
}
86+
87+
#endif
88+
7589
#if MICROPY_EMIT_MACHINE_CODE
7690

7791
typedef struct _reloc_info_t {
@@ -299,11 +313,10 @@ static mp_raw_code_t *load_raw_code(mp_reader_t *reader, mp_module_context_t *co
299313
read_bytes(reader, rodata, rodata_size);
300314
}
301315

302-
// Viper code with BSS/rodata should not have any children.
303-
// Reuse the children pointer to reference the BSS/rodata
304-
// memory so that it is not reclaimed by the GC.
305-
assert(!has_children);
306-
children = (void *)data;
316+
#if MICROPY_PERSISTENT_CODE_TRACK_BSS_RODATA
317+
// Track the BSS/rodata memory so it's not reclaimed by the GC.
318+
track_root_pointer(data);
319+
#endif
307320
}
308321
}
309322
#endif
@@ -351,16 +364,9 @@ static mp_raw_code_t *load_raw_code(mp_reader_t *reader, mp_module_context_t *co
351364
fun_data = MP_PLAT_COMMIT_EXEC(fun_data, fun_data_len, opt_ri);
352365
#else
353366
if (native_scope_flags & MP_SCOPE_FLAG_VIPERRELOC) {
354-
#if MICROPY_PERSISTENT_CODE_TRACK_RELOC_CODE
355-
// If native code needs relocations then it's not guaranteed that a pointer to
356-
// the head of `buf` (containing the machine code) will be retained for the GC
357-
// to trace. This is because native functions can start inside `buf` and so
358-
// it's possible that the only GC-reachable pointers are pointers inside `buf`.
359-
// So put this `buf` on a list of reachable root pointers.
360-
if (MP_STATE_PORT(track_reloc_code_list) == MP_OBJ_NULL) {
361-
MP_STATE_PORT(track_reloc_code_list) = mp_obj_new_list(0, NULL);
362-
}
363-
mp_obj_list_append(MP_STATE_PORT(track_reloc_code_list), MP_OBJ_FROM_PTR(fun_data));
367+
#if MICROPY_PERSISTENT_CODE_TRACK_FUN_DATA
368+
// Track the function data memory so it's not reclaimed by the GC.
369+
track_root_pointer(fun_data);
364370
#endif
365371
// Do the relocations.
366372
mp_native_relocate(&ri, fun_data, (uintptr_t)fun_data);
@@ -662,8 +668,3 @@ void mp_raw_code_save_file(mp_compiled_module_t *cm, qstr filename) {
662668
#endif // MICROPY_PERSISTENT_CODE_SAVE_FILE
663669

664670
#endif // MICROPY_PERSISTENT_CODE_SAVE
665-
666-
#if MICROPY_PERSISTENT_CODE_TRACK_RELOC_CODE
667-
// An mp_obj_list_t that tracks relocated native code to prevent the GC from reclaiming them.
668-
MP_REGISTER_ROOT_POINTER(mp_obj_t track_reloc_code_list);
669-
#endif

py/runtime.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,8 @@ void mp_init(void) {
119119
MP_STATE_VM(mp_module_builtins_override_dict) = NULL;
120120
#endif
121121

122-
#if MICROPY_PERSISTENT_CODE_TRACK_RELOC_CODE
123-
MP_STATE_VM(track_reloc_code_list) = MP_OBJ_NULL;
122+
#if MICROPY_PERSISTENT_CODE_TRACK_FUN_DATA || MICROPY_PERSISTENT_CODE_TRACK_BSS_RODATA
123+
MP_STATE_VM(persistent_code_root_pointers) = MP_OBJ_NULL;
124124
#endif
125125

126126
#if MICROPY_PY_OS_DUPTERM

tests/micropython/import_mpy_native_gc.py

+28-11
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Test that native code loaded from a .mpy file is retained after a GC.
1+
# Test that native text/BSS/rodata loaded from a .mpy file is retained after a GC.
22

33
try:
44
import gc, sys, io, vfs
@@ -44,17 +44,20 @@ def open(self, path, mode):
4444
return UserFile(self.files[path])
4545

4646

47-
# Pre-compiled examples/natmod/features0 example for various architectures, keyed
47+
# Pre-compiled import_mpy_native_gc_module example for various architectures, keyed
4848
# by the required value of sys.implementation._mpy (without sub-version).
49-
# cd examples/natmod/features0
50-
# make clean
51-
# make ARCH=x64 # or ARCH=armv6m
52-
# cat features0.mpy | python -c 'import sys; print(sys.stdin.buffer.read())'
49+
# To rebuild:
50+
# $ cd import_mpy_native_gc_module
51+
# $ make clean
52+
# $ make ARCH=x64 # or ARCH=armv6m or ARCH=xtensawin
53+
# Then copy the bytes object printed on the last line.
5354
features0_file_contents = {
5455
# -march=x64
55-
0x806: b'M\x06\x0b\x1f\x02\x004build/features0.native.mpy\x00\x12factorial\x00\x8a\x02\xe9/\x00\x00\x00SH\x8b\x1d\x83\x00\x00\x00\xbe\x02\x00\x00\x00\xffS\x18\xbf\x01\x00\x00\x00H\x85\xc0u\x0cH\x8bC \xbe\x02\x00\x00\x00[\xff\xe0H\x0f\xaf\xf8H\xff\xc8\xeb\xe6ATUSH\x8b\x1dQ\x00\x00\x00H\x8bG\x08L\x8bc(H\x8bx\x08A\xff\xd4H\x8d5+\x00\x00\x00H\x89\xc5H\x8b\x059\x00\x00\x00\x0f\xb7x\x02\xffShH\x89\xefA\xff\xd4H\x8b\x03[]A\\\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x11$\r&\xaf \x01"\xff',
56+
0x806: b"M\x06\x0b\x1f\x03\x002build/test_x64.native.mpy\x00\x08add1\x00\x0cunused\x00\x91B\xe9I\x00\x00\x00H\x8b\x05\xf4\x00\x00\x00H\x8b\x00\xc3H\x8b\x05\xf9\x00\x00\x00\xbe\x02\x00\x00\x00\x8b8H\x8b\x05\xdb\x00\x00\x00H\x8b@ \xff\xe0H\x8b\x05\xce\x00\x00\x00S\xbe\x02\x00\x00\x00H\x8bX \xffP\x18\xbe\x02\x00\x00\x00H\x8dx\x01H\x89\xd8[\xff\xe0AVAUATUSH\x8b\x1d\xa3\x00\x00\x00H\x8bG\x08L\x8bk(H\x8bx\x08A\xff\xd5L\x8b5\x95\x00\x00\x00L\x8bchH\x8d5r\x00\x00\x00H\x89\xc5H\x8b\x05\x88\x00\x00\x00A\x0f\xb7~\x04\xc7\x00@\xe2\x01\x00A\xff\xd4H\x8d5C\x00\x00\x00\xbfV\x00\x00\x00A\xff\xd4A\x0f\xb7~\x02H\x8d5\x1f\x00\x00\x00A\xff\xd4H\x89\xefA\xff\xd5H\x8b\x03[]A\\A]A^\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00+\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00P\x04\x11@\rB\tD\xaf4\x016\xad8\x01:\xaf<\x01>\xff",
5657
# -march=armv6m
57-
0x1006: b"M\x06\x13\x1f\x02\x004build/features0.native.mpy\x00\x12factorial\x00\x88\x02\x18\xe0\x00\x00\x10\xb5\tK\tJ{D\x9cX\x02!\xe3h\x98G\x03\x00\x01 \x00+\x02\xd0XC\x01;\xfa\xe7\x02!#i\x98G\x10\xbd\xc0Fj\x00\x00\x00\x00\x00\x00\x00\xf8\xb5\nN\nK~D\xf4XChgiXh\xb8G\x05\x00\x07K\x08I\xf3XyDX\x88ck\x98G(\x00\xb8G h\xf8\xbd\xc0F:\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x1e\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x11<\r>\xaf8\x01:\xff",
58+
0x1006: b"M\x06\x13\x1f\x03\x008build/test_armv6m.native.mpy\x00\x08add1\x00\x0cunused\x00\x8eb0\xe0\x00\x00\x00\x00\x00\x00\x02K\x03J{D\x9bX\x18hpG\xd0\x00\x00\x00\x00\x00\x00\x00\x10\xb5\x05K\x05I\x06J{D\x9aX[X\x10h\x02!\x1bi\x98G\x10\xbd\xb8\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x10\xb5\x06K\x06J{D\x9bX\x02!\x1ci\xdbh\x98G\x02!\x010\xa0G\x10\xbd\xc0F\x96\x00\x00\x00\x00\x00\x00\x00\xf7\xb5\x12O\x12K\x7fD\xfdX\x12Lki|D\x00\x93ChXh\x00\x9b\x98G\x0fK\x01\x90\x0fJ\xfbXnk\x1a`\x0eK!\x00\xffX\xb8\x88\xb0G!\x00V \x081\xb0G!\x00x\x88\x101\xb0G\x01\x98\x00\x9b\x98G(h\xfe\xbd\xc0Fr\x00\x00\x00\x00\x00\x00\x00R\x00\x00\x00\x08\x00\x00\x00@\xe2\x01\x00\x04\x00\x00\x00\x00\x00\x00\x00\t\x00\x00\x00\x00\x00\x00\x00\x1d\x00\x00\x00\x00\x00\x00\x00A\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00P\x04\x11p\rr\tt\xafd\x01f\xadh\x01j\xafl\x01n\xff",
59+
# -march=xtensawin
60+
0x2806: b"M\x06+\x1f\x03\x00>build/test_xtensawin.native.mpy\x00\x08add1\x00\x0cunused\x00\x8a\x12\x06\x16\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x006A\x00\x81\xf9\xff(\x08\x1d\xf0\x00\x006A\x00\x91\xfb\xff\x81\xf5\xff\xa8\t\x88H\x0c+\xe0\x08\x00-\n\x1d\xf0\x00\x006A\x00\x81\xf0\xff\xad\x02xH\x888\x0c+\xe0\x08\x00\x0c+\x1b\xaa\xe0\x07\x00-\n\x1d\xf06A\x00a\xe9\xff\x88\x122&\x05\xa2(\x01\xe0\x03\x00q\xe6\xff\x81\xea\xff\x92\xa7\x89\xa0\x99\x11H\xd6]\n\xb1\xe3\xff\xa2\x17\x02\x99\x08\xe0\x04\x00\xb1\xe2\xff\\j\xe0\x04\x00\xb1\xe1\xff\xa2\x17\x01\xe0\x04\x00\xad\x05\xe0\x03\x00(\x06\x1d\xf0p\x18\x04\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00(\x00\x00\x00\x00\x00\x00\x00\x1c\x00\x00\x00\x11\x02\r\x04\x07\x06\x03\t\x0c\xaf\x01\x01\x03\xad\x05\x01\x07\xaf\t\x01\x0b\xff",
5861
}
5962

6063
# Populate armv7m-derived archs based on armv6m.
@@ -76,20 +79,34 @@ def open(self, path, mode):
7679

7780
# Import the native function.
7881
gc.collect()
79-
from features0 import factorial
82+
from features0 import get, add1
83+
84+
# Test that the native functions work to begin with.
85+
print(get())
86+
print(add1(12))
8087

8188
# Free the module that contained the function.
8289
del sys.modules["features0"]
8390

91+
92+
# Sweep the stack to remove any stray pointers that we are aiming to reclaim.
93+
def recurse(n):
94+
if n:
95+
recurse(n - 1)
96+
97+
98+
recurse(10)
99+
84100
# Run a GC cycle which should reclaim the module but not the function.
85101
gc.collect()
86102

87103
# Allocate lots of fragmented memory to overwrite anything that was just freed by the GC.
88104
for i in range(1000):
89105
[]
90106

91-
# Run the native function, it should not have been freed or overwritten.
92-
print(factorial(10))
107+
# Run the native function, its text/BSS/rodata should not have been freed or overwritten.
108+
print(get())
109+
print(add1(12))
93110

94111
# Unmount and undo path addition.
95112
vfs.umount("/userfs")
Original file line numberDiff line numberDiff line change
@@ -1 +1,4 @@
1-
3628800
1+
123456
2+
13
3+
123456
4+
13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
MPY_DIR = ../../..
2+
3+
MOD = test_$(ARCH)
4+
SRC = test.c
5+
ARCH = x64
6+
7+
.PHONY: main
8+
main: all
9+
$(Q)cat $(MOD).mpy | python -c 'import sys; print(sys.stdin.buffer.read())'
10+
11+
include $(MPY_DIR)/py/dynruntime.mk
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// This test native module is used by import_mpy_native_gc.py.
2+
// It has:
3+
// - A variable in the BSS, to check that the BSS is not reclaimed by the GC.
4+
// - An unused native function at the start so that subsequent native functions
5+
// don't start at the beginning of the native function data. This tests that the
6+
// GC doesn't reclaim the native function data even when the only pointer to that
7+
// data is pointing inside the allocated memory.
8+
9+
#include "py/dynruntime.h"
10+
11+
uint32_t bss_variable;
12+
13+
static mp_obj_t unused(mp_obj_t x_obj) {
14+
return mp_const_none;
15+
}
16+
static MP_DEFINE_CONST_FUN_OBJ_1(unused_obj, unused);
17+
18+
static mp_obj_t get(void) {
19+
return mp_obj_new_int(bss_variable);
20+
}
21+
static MP_DEFINE_CONST_FUN_OBJ_0(get_obj, get);
22+
23+
static mp_obj_t add1(mp_obj_t x_obj) {
24+
return mp_obj_new_int(mp_obj_get_int(x_obj) + 1);
25+
}
26+
static MP_DEFINE_CONST_FUN_OBJ_1(add1_obj, add1);
27+
28+
mp_obj_t mpy_init(mp_obj_fun_bc_t *self, size_t n_args, size_t n_kw, mp_obj_t *args) {
29+
MP_DYNRUNTIME_INIT_ENTRY
30+
31+
bss_variable = 123456;
32+
33+
mp_store_global(MP_QSTR_unused, MP_OBJ_FROM_PTR(&unused_obj));
34+
mp_store_global(MP_QSTR_get, MP_OBJ_FROM_PTR(&get_obj));
35+
mp_store_global(MP_QSTR_add1, MP_OBJ_FROM_PTR(&add1_obj));
36+
37+
MP_DYNRUNTIME_INIT_EXIT
38+
}

0 commit comments

Comments
 (0)