Skip to content

Commit 76d9bcd

Browse files
ozbenhstewartsmith
authored andcommitted
lock: Add additional lock auditing code
Keep track of lock owner name and replace lock_depth counter with a per-cpu list of locks held by the cpu. This allows us to print the actual locks held in case we hit the (in)famous message about opal_pollers being run with a lock held. It also allows us to warn (and drop them) if locks are still held when returning to the OS or completing a scheduled job. Signed-off-by: Benjamin Herrenschmidt <[email protected]> Reviewed-by: Nicholas Piggin <[email protected]> [stewart: fix unit tests] Signed-off-by: Stewart Smith <[email protected]>
1 parent ca612b8 commit 76d9bcd

21 files changed

+106
-31
lines changed

core/cpu.c

+6
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,11 @@ void cpu_process_jobs(void)
285285
if (no_return)
286286
free(job);
287287
func(data);
288+
if (!list_empty(&cpu->locks_held)) {
289+
prlog(PR_ERR, "OPAL job %s returning with locks held\n",
290+
job->name);
291+
drop_my_locks(true);
292+
}
288293
lock(&cpu->job_lock);
289294
if (!no_return) {
290295
cpu->job_count--;
@@ -822,6 +827,7 @@ static void init_cpu_thread(struct cpu_thread *t,
822827
init_lock(&t->dctl_lock);
823828
init_lock(&t->job_lock);
824829
list_head_init(&t->job_queue);
830+
list_head_init(&t->locks_held);
825831
t->stack_guard = STACK_CHECK_GUARD_BASE ^ pir;
826832
t->state = state;
827833
t->pir = pir;

core/lock.c

+32-9
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ static void unlock_check(struct lock *l)
5757
if (l->in_con_path && this_cpu()->con_suspend == 0)
5858
lock_error(l, "Unlock con lock with console not suspended", 3);
5959

60-
if (this_cpu()->lock_depth == 0)
61-
lock_error(l, "Releasing lock with 0 depth", 4);
60+
if (list_empty(&this_cpu()->locks_held))
61+
lock_error(l, "Releasing lock we don't hold depth", 4);
6262
}
6363

6464
#else
@@ -89,30 +89,31 @@ static inline bool __try_lock(struct cpu_thread *cpu, struct lock *l)
8989
return false;
9090
}
9191

92-
bool try_lock(struct lock *l)
92+
bool try_lock_caller(struct lock *l, const char *owner)
9393
{
9494
struct cpu_thread *cpu = this_cpu();
9595

9696
if (bust_locks)
9797
return true;
9898

9999
if (__try_lock(cpu, l)) {
100+
l->owner = owner;
100101
if (l->in_con_path)
101102
cpu->con_suspend++;
102-
cpu->lock_depth++;
103+
list_add(&cpu->locks_held, &l->list);
103104
return true;
104105
}
105106
return false;
106107
}
107108

108-
void lock(struct lock *l)
109+
void lock_caller(struct lock *l, const char *owner)
109110
{
110111
if (bust_locks)
111112
return;
112113

113114
lock_check(l);
114115
for (;;) {
115-
if (try_lock(l))
116+
if (try_lock_caller(l, owner))
116117
break;
117118
smt_lowest();
118119
while (l->lock_val)
@@ -130,8 +131,9 @@ void unlock(struct lock *l)
130131

131132
unlock_check(l);
132133

134+
l->owner = NULL;
135+
list_del(&l->list);
133136
lwsync();
134-
this_cpu()->lock_depth--;
135137
l->lock_val = 0;
136138

137139
/* WARNING: On fast reboot, we can be reset right at that
@@ -144,19 +146,40 @@ void unlock(struct lock *l)
144146
}
145147
}
146148

147-
bool lock_recursive(struct lock *l)
149+
bool lock_recursive_caller(struct lock *l, const char *caller)
148150
{
149151
if (bust_locks)
150152
return false;
151153

152154
if (lock_held_by_me(l))
153155
return false;
154156

155-
lock(l);
157+
lock_caller(l, caller);
156158
return true;
157159
}
158160

159161
void init_locks(void)
160162
{
161163
bust_locks = false;
162164
}
165+
166+
void dump_locks_list(void)
167+
{
168+
struct lock *l;
169+
170+
prlog(PR_ERR, "Locks held:\n");
171+
list_for_each(&this_cpu()->locks_held, l, list)
172+
prlog(PR_ERR, " %s\n", l->owner);
173+
}
174+
175+
void drop_my_locks(bool warn)
176+
{
177+
struct lock *l;
178+
179+
while((l = list_pop(&this_cpu()->locks_held, struct lock, list)) != NULL) {
180+
if (warn)
181+
prlog(PR_ERR, " %s\n", l->owner);
182+
unlock(l);
183+
}
184+
}
185+

core/opal.c

+7-1
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,11 @@ int64_t opal_exit_check(int64_t retval, struct stack_frame *eframe)
182182
printf("CPU UN-ACCOUNTED FIRMWARE ENTRY! PIR=%04lx cpu @%p -> pir=%04x token=%llu retval=%lld\n",
183183
mfspr(SPR_PIR), cpu, cpu->pir, token, retval);
184184
} else {
185+
if (!list_empty(&cpu->locks_held)) {
186+
prlog(PR_ERR, "OPAL exiting with locks held, token=%llu retval=%lld\n",
187+
token, retval);
188+
drop_my_locks(true);
189+
}
185190
sync(); /* release barrier vs quiescing */
186191
cpu->in_opal_call--;
187192
}
@@ -557,14 +562,15 @@ void opal_run_pollers(void)
557562
}
558563
this_cpu()->in_poller = true;
559564

560-
if (this_cpu()->lock_depth && pollers_with_lock_warnings < 64) {
565+
if (!list_empty(&this_cpu()->locks_held) && pollers_with_lock_warnings < 64) {
561566
/**
562567
* @fwts-label OPALPollerWithLock
563568
* @fwts-advice opal_run_pollers() was called with a lock
564569
* held, which could lead to deadlock if not excessively
565570
* lucky/careful.
566571
*/
567572
prlog(PR_ERR, "Running pollers with lock held !\n");
573+
dump_locks_list();
568574
backtrace();
569575
pollers_with_lock_warnings++;
570576
if (pollers_with_lock_warnings == 64) {

core/stack.c

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include <stack.h>
2222
#include <mem_region.h>
2323
#include <unistd.h>
24+
#include <lock.h>
2425

2526
#define STACK_BUF_ENTRIES 60
2627
static struct bt_entry bt_buf[STACK_BUF_ENTRIES];

core/test/run-malloc-speed.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,9 @@ static inline void real_free(void *p)
5555
char __rodata_start[1], __rodata_end[1];
5656
struct dt_node *dt_root;
5757

58-
void lock(struct lock *l)
58+
void lock_caller(struct lock *l, const char *caller)
5959
{
60+
(void)caller;
6061
assert(!l->lock_val);
6162
l->lock_val = 1;
6263
}

core/test/run-malloc.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,9 @@ static inline void real_free(void *p)
5757

5858
struct dt_node *dt_root;
5959

60-
void lock(struct lock *l)
60+
void lock_caller(struct lock *l, const char *caller)
6161
{
62+
(void)caller;
6263
assert(!l->lock_val);
6364
l->lock_val = 1;
6465
}

core/test/run-mem_range_is_reserved.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,9 @@ static void real_free(void *p)
5757
#include <assert.h>
5858
#include <stdio.h>
5959

60-
void lock(struct lock *l)
60+
void lock_caller(struct lock *l, const char *caller)
6161
{
62+
(void)caller;
6263
assert(!l->lock_val);
6364
l->lock_val++;
6465
}

core/test/run-mem_region.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,9 @@ static inline void real_free(void *p)
5555

5656
struct dt_node *dt_root;
5757

58-
void lock(struct lock *l)
58+
void lock_caller(struct lock *l, const char *caller)
5959
{
60+
(void)caller;
6061
assert(!l->lock_val);
6162
l->lock_val++;
6263
}

core/test/run-mem_region_init.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,9 @@ static inline char *skiboot_strdup(const char *str)
6363
#include <assert.h>
6464
#include <stdio.h>
6565

66-
void lock(struct lock *l)
66+
void lock_caller(struct lock *l, const char *caller)
6767
{
68+
(void)caller;
6869
assert(!l->lock_val);
6970
l->lock_val = 1;
7071
}

core/test/run-mem_region_next.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,9 @@ static void real_free(void *p)
5252
#include <assert.h>
5353
#include <stdio.h>
5454

55-
void lock(struct lock *l)
55+
void lock_caller(struct lock *l, const char *caller)
5656
{
57+
(void)caller;
5758
assert(!l->lock_val);
5859
l->lock_val++;
5960
}

core/test/run-mem_region_release_unused.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,9 @@ static inline void __free(void *p, const char *location __attribute__((unused)))
6060
#include <assert.h>
6161
#include <stdio.h>
6262

63-
void lock(struct lock *l)
63+
void lock_caller(struct lock *l, const char *caller)
6464
{
65+
(void)caller;
6566
l->lock_val++;
6667
}
6768

core/test/run-mem_region_release_unused_noalloc.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,9 @@ static inline void __free(void *p, const char *location __attribute__((unused)))
6060
#include <assert.h>
6161
#include <stdio.h>
6262

63-
void lock(struct lock *l)
63+
void lock_caller(struct lock *l, const char *caller)
6464
{
65+
(void)caller;
6566
l->lock_val++;
6667
}
6768

core/test/run-mem_region_reservations.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,9 @@ static void real_free(void *p)
5757
#include <assert.h>
5858
#include <stdio.h>
5959

60-
void lock(struct lock *l)
60+
void lock_caller(struct lock *l, const char *caller)
6161
{
62+
(void)caller;
6263
assert(!l->lock_val);
6364
l->lock_val++;
6465
}

core/test/run-msg.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,9 @@ static void *zalloc(size_t size)
4141
#include "../opal-msg.c"
4242
#include <skiboot.h>
4343

44-
void lock(struct lock *l)
44+
void lock_caller(struct lock *l, const char *caller)
4545
{
46+
(void)caller;
4647
assert(!l->lock_val);
4748
l->lock_val = 1;
4849
}

core/test/run-timer.c

+5-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,11 @@
1212

1313
static uint64_t stamp, last;
1414
struct lock;
15-
static inline void lock(struct lock *l) { (void)l; }
15+
static inline void lock_caller(struct lock *l, const char *caller)
16+
{
17+
(void)caller;
18+
(void)l;
19+
}
1620
static inline void unlock(struct lock *l) { (void)l; }
1721

1822
unsigned long tb_hz = 512000000;

core/test/run-trace.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,9 @@ struct debug_descriptor debug_descriptor = {
113113
.trace_mask = -1
114114
};
115115

116-
void lock(struct lock *l)
116+
void lock_caller(struct lock *l, const char *caller)
117117
{
118+
(void)caller;
118119
assert(!l->lock_val);
119120
l->lock_val = 1;
120121
}

core/timebase.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ void time_wait(unsigned long duration)
5353
{
5454
struct cpu_thread *c = this_cpu();
5555

56-
if (this_cpu()->lock_depth) {
56+
if (!list_empty(&this_cpu()->locks_held)) {
5757
time_wait_nopoll(duration);
5858
return;
5959
}

coverity-model.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ void mem_free(struct mem_region *region, void *mem, const char *location) {
1010
__coverity_free__(mem);
1111
}
1212

13-
void lock(struct lock *l) {
13+
void lock_caller(struct lock *l, const char *caller)
14+
{
1415
__coverity_exclusive_lock_acquire__(l);
1516
}
1617

hdata/test/stubs.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ static bool false_stub(void) { return false; }
109109
#define NOOP_STUB FALSE_STUB
110110

111111
TRUE_STUB(lock_held_by_me);
112-
NOOP_STUB(lock);
112+
NOOP_STUB(lock_caller);
113113
NOOP_STUB(unlock);
114114
NOOP_STUB(early_uart_init);
115115
NOOP_STUB(mem_reserve_fw);

include/cpu.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ struct cpu_thread {
6161
uint64_t save_r1;
6262
void *icp_regs;
6363
uint32_t in_opal_call;
64-
uint32_t lock_depth;
6564
uint32_t con_suspend;
65+
struct list_head locks_held;
6666
bool con_need_flush;
6767
bool quiesce_opal_call;
6868
bool in_mcount;

include/lock.h

+28-5
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
#include <stdbool.h>
2121
#include <processor.h>
2222
#include <cmpxchg.h>
23+
#include <ccan/list/list.h>
24+
#include <ccan/str/str.h>
2325

2426
struct lock {
2527
/* Lock value has bit 63 as lock bit and the PIR of the owner
@@ -32,10 +34,19 @@ struct lock {
3234
* in which case taking it will suspend console flushing
3335
*/
3436
bool in_con_path;
37+
38+
/* file/line of lock owner */
39+
const char *owner;
40+
41+
/* linkage in per-cpu list of owned locks */
42+
struct list_node list;
3543
};
3644

37-
/* Initializer */
38-
#define LOCK_UNLOCKED { .lock_val = 0, .in_con_path = 0 }
45+
/* Initializer... not ideal but works for now. If we need different
46+
* values for the fields and/or start getting warnings we'll have to
47+
* play macro tricks
48+
*/
49+
#define LOCK_UNLOCKED { 0 }
3950

4051
/* Note vs. libc and locking:
4152
*
@@ -65,8 +76,14 @@ static inline void init_lock(struct lock *l)
6576
*l = (struct lock)LOCK_UNLOCKED;
6677
}
6778

68-
extern bool try_lock(struct lock *l);
69-
extern void lock(struct lock *l);
79+
#define LOCK_CALLER __FILE__ ":" stringify(__LINE__)
80+
81+
#define try_lock(l) try_lock_caller(l, LOCK_CALLER)
82+
#define lock(l) lock_caller(l, LOCK_CALLER)
83+
#define lock_recursive(l) lock_recursive_caller(l, LOCK_CALLER)
84+
85+
extern bool try_lock_caller(struct lock *l, const char *caller);
86+
extern void lock_caller(struct lock *l, const char *caller);
7087
extern void unlock(struct lock *l);
7188

7289
extern bool lock_held_by_me(struct lock *l);
@@ -77,9 +94,15 @@ extern bool lock_held_by_me(struct lock *l);
7794
* returns false if the lock was already held by this cpu. If it returns
7895
* true, then the caller shall release it when done.
7996
*/
80-
extern bool lock_recursive(struct lock *l);
97+
extern bool lock_recursive_caller(struct lock *l, const char *caller);
8198

8299
/* Called after per-cpu data structures are available */
83100
extern void init_locks(void);
84101

102+
/* Dump the list of locks held by this CPU */
103+
extern void dump_locks_list(void);
104+
105+
/* Clean all locks held by CPU (and warn if any) */
106+
extern void drop_my_locks(bool warn);
107+
85108
#endif /* __LOCK_H */

0 commit comments

Comments
 (0)