Skip to content

Commit 9c2136b

Browse files
Delyan KratunovPeter Zijlstra
Delyan Kratunov
authored and
Peter Zijlstra
committed
sched/tracing: Append prev_state to tp args instead
Commit fa2c325 (sched/tracing: Don't re-read p->state when emitting sched_switch event, 2022-01-20) added a new prev_state argument to the sched_switch tracepoint, before the prev task_struct pointer. This reordering of arguments broke BPF programs that use the raw tracepoint (e.g. tp_btf programs). The type of the second argument has changed and existing programs that assume a task_struct* argument (e.g. for bpf_task_storage access) will now fail to verify. If we instead append the new argument to the end, all existing programs would continue to work and can conditionally extract the prev_state argument on supported kernel versions. Fixes: fa2c325 (sched/tracing: Don't re-read p->state when emitting sched_switch event, 2022-01-20) Signed-off-by: Delyan Kratunov <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Acked-by: Steven Rostedt (Google) <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent c5eb0a6 commit 9c2136b

File tree

9 files changed

+21
-21
lines changed

9 files changed

+21
-21
lines changed

include/trace/events/sched.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -222,11 +222,11 @@ static inline long __trace_sched_switch_state(bool preempt,
222222
TRACE_EVENT(sched_switch,
223223

224224
TP_PROTO(bool preempt,
225-
unsigned int prev_state,
226225
struct task_struct *prev,
227-
struct task_struct *next),
226+
struct task_struct *next,
227+
unsigned int prev_state),
228228

229-
TP_ARGS(preempt, prev_state, prev, next),
229+
TP_ARGS(preempt, prev, next, prev_state),
230230

231231
TP_STRUCT__entry(
232232
__array( char, prev_comm, TASK_COMM_LEN )

kernel/sched/core.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -6382,7 +6382,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
63826382
migrate_disable_switch(rq, prev);
63836383
psi_sched_switch(prev, next, !task_on_rq_queued(prev));
63846384

6385-
trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev_state, prev, next);
6385+
trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next, prev_state);
63866386

63876387
/* Also unlocks the rq: */
63886388
rq = context_switch(rq, prev, next, &rf);

kernel/trace/fgraph.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -404,9 +404,9 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
404404

405405
static void
406406
ftrace_graph_probe_sched_switch(void *ignore, bool preempt,
407-
unsigned int prev_state,
408407
struct task_struct *prev,
409-
struct task_struct *next)
408+
struct task_struct *next,
409+
unsigned int prev_state)
410410
{
411411
unsigned long long timestamp;
412412
int index;

kernel/trace/ftrace.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -7420,9 +7420,9 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops)
74207420

74217421
static void
74227422
ftrace_filter_pid_sched_switch_probe(void *data, bool preempt,
7423-
unsigned int prev_state,
74247423
struct task_struct *prev,
7425-
struct task_struct *next)
7424+
struct task_struct *next,
7425+
unsigned int prev_state)
74267426
{
74277427
struct trace_array *tr = data;
74287428
struct trace_pid_list *pid_list;

kernel/trace/trace_events.c

+4-4
Original file line numberDiff line numberDiff line change
@@ -773,9 +773,9 @@ void trace_event_follow_fork(struct trace_array *tr, bool enable)
773773

774774
static void
775775
event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
776-
unsigned int prev_state,
777776
struct task_struct *prev,
778-
struct task_struct *next)
777+
struct task_struct *next,
778+
unsigned int prev_state)
779779
{
780780
struct trace_array *tr = data;
781781
struct trace_pid_list *no_pid_list;
@@ -799,9 +799,9 @@ event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
799799

800800
static void
801801
event_filter_pid_sched_switch_probe_post(void *data, bool preempt,
802-
unsigned int prev_state,
803802
struct task_struct *prev,
804-
struct task_struct *next)
803+
struct task_struct *next,
804+
unsigned int prev_state)
805805
{
806806
struct trace_array *tr = data;
807807
struct trace_pid_list *no_pid_list;

kernel/trace/trace_osnoise.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -1168,9 +1168,9 @@ thread_exit(struct osnoise_variables *osn_var, struct task_struct *t)
11681168
*/
11691169
static void
11701170
trace_sched_switch_callback(void *data, bool preempt,
1171-
unsigned int prev_state,
11721171
struct task_struct *p,
1173-
struct task_struct *n)
1172+
struct task_struct *n,
1173+
unsigned int prev_state)
11741174
{
11751175
struct osnoise_variables *osn_var = this_cpu_osn_var();
11761176

kernel/trace/trace_sched_switch.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ static DEFINE_MUTEX(sched_register_mutex);
2222

2323
static void
2424
probe_sched_switch(void *ignore, bool preempt,
25-
unsigned int prev_state,
26-
struct task_struct *prev, struct task_struct *next)
25+
struct task_struct *prev, struct task_struct *next,
26+
unsigned int prev_state)
2727
{
2828
int flags;
2929

kernel/trace/trace_sched_wakeup.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -426,8 +426,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
426426

427427
static void notrace
428428
probe_wakeup_sched_switch(void *ignore, bool preempt,
429-
unsigned int prev_state,
430-
struct task_struct *prev, struct task_struct *next)
429+
struct task_struct *prev, struct task_struct *next,
430+
unsigned int prev_state)
431431
{
432432
struct trace_array_cpu *data;
433433
u64 T0, T1, delta;

samples/trace_events/trace_custom_sched.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,11 @@ TRACE_CUSTOM_EVENT(sched_switch,
2525
* that the custom event is using.
2626
*/
2727
TP_PROTO(bool preempt,
28-
unsigned int prev_state,
2928
struct task_struct *prev,
30-
struct task_struct *next),
29+
struct task_struct *next,
30+
unsigned int prev_state),
3131

32-
TP_ARGS(preempt, prev_state, prev, next),
32+
TP_ARGS(preempt, prev, next, prev_state),
3333

3434
/*
3535
* The next fields are where the customization happens.

0 commit comments

Comments
 (0)