Skip to content

Commit e5886e3

Browse files
tejasupintel-lab-lkp
authored andcommitted
drm/xe/guc: Use exec queue hints for GT frequency
Allow user to provide a low latency hint per exec queue. When set, KMD sends a hint to GuC which results in special handling for this exec queue. SLPC will ramp the GT frequency aggressively every time it switches to this exec queue. We need to enable the use of SLPC Compute strategy during init, but it will apply only to exec queues that set this bit during exec queue creation. Improvement with this approach as below: Before, :~$ NEOReadDebugKeys=1 EnableDirectSubmission=0 clpeak --kernel-latency Platform: Intel(R) OpenCL Graphics Device: Intel(R) Graphics [0xe20b] Driver version : 24.52.0 (Linux x64) Compute units : 160 Clock frequency : 2850 MHz Kernel launch latency : 283.16 us After, :~$ NEOReadDebugKeys=1 EnableDirectSubmission=0 clpeak --kernel-latency Platform: Intel(R) OpenCL Graphics Device: Intel(R) Graphics [0xe20b] Driver version : 24.52.0 (Linux x64) Compute units : 160 Clock frequency : 2850 MHz Kernel launch latency : 63.38 us UMD will indicate low latency hint with flag as mentioned below, * struct drm_xe_exec_queue_create exec_queue_create = { * .flags = DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT or 0 * .extensions = 0, * .vm_id = vm, * .num_bb_per_exec = 1, * .num_eng_per_bb = 1, * .instances = to_user_pointer(&instance), * }; * ioctl(fd, DRM_IOCTL_XE_EXEC_QUEUE_CREATE, &exec_queue_create); Link to UMD PR : intel/compute-runtime#794 Note: There is outstanding issue on guc side to be not able to switch to max frequency as per strategy indicated by KMD, so for experminet/test result hardcoding apporch was taken and passed to guc as policy. Effort on debugging from guc side is going on in parallel. V2: - DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT 1 is already planned for other hint(Szymon) - Add motivation to description (Lucas) Cc:[email protected] Cc:[email protected] Cc:Michal Mrozek <[email protected]> Cc:Szymon Morek <[email protected]> Cc:José Roberto de Souza <[email protected]> Signed-off-by: Tejas Upadhyay <[email protected]>
1 parent 3259ff4 commit e5886e3

File tree

5 files changed

+32
-4
lines changed

5 files changed

+32
-4
lines changed

drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,9 @@ struct slpc_task_state_data {
174174
};
175175
} __packed;
176176

177+
#define SLPC_EXEC_QUEUE_FREQ_REQ_IS_COMPUTE REG_BIT(28)
178+
#define SLPC_OPTIMIZED_STRATEGY_COMPUTE REG_BIT(0)
179+
177180
struct slpc_shared_data_header {
178181
/* Total size in bytes of this shared buffer. */
179182
u32 size;

drivers/gpu/drm/xe/xe_exec_queue.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -553,7 +553,8 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
553553
u32 len;
554554
int err;
555555

556-
if (XE_IOCTL_DBG(xe, args->flags) ||
556+
if (XE_IOCTL_DBG(xe, args->flags &&
557+
!(args->flags & DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT)) ||
557558
XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
558559
return -EINVAL;
559560

@@ -578,7 +579,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
578579

579580
for_each_tile(tile, xe, id) {
580581
struct xe_exec_queue *new;
581-
u32 flags = EXEC_QUEUE_FLAG_VM;
582+
u32 flags = args->flags | EXEC_QUEUE_FLAG_VM;
582583

583584
if (id)
584585
flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD;
@@ -626,7 +627,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
626627
}
627628

628629
q = xe_exec_queue_create(xe, vm, logical_mask,
629-
args->width, hwe, 0,
630+
args->width, hwe, args->flags,
630631
args->extensions);
631632
up_read(&vm->lock);
632633
xe_vm_put(vm);

drivers/gpu/drm/xe/xe_guc_pc.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -992,6 +992,19 @@ static int pc_init_freqs(struct xe_guc_pc *pc)
992992
return ret;
993993
}
994994

995+
static int xe_guc_pc_set_strategy(struct xe_guc_pc *pc, u32 val)
996+
{
997+
int ret = 0;
998+
999+
xe_pm_runtime_get(pc_to_xe(pc));
1000+
ret = pc_action_set_param(pc,
1001+
SLPC_PARAM_STRATEGIES,
1002+
val);
1003+
xe_pm_runtime_put(pc_to_xe(pc));
1004+
1005+
return ret;
1006+
}
1007+
9951008
/**
9961009
* xe_guc_pc_start - Start GuC's Power Conservation component
9971010
* @pc: Xe_GuC_PC instance
@@ -1052,6 +1065,9 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
10521065

10531066
ret = pc_action_setup_gucrc(pc, GUCRC_FIRMWARE_CONTROL);
10541067

1068+
/* Enable SLPC Optimized Strategy for compute */
1069+
xe_guc_pc_set_strategy(pc, SLPC_OPTIMIZED_STRATEGY_COMPUTE);
1070+
10551071
out:
10561072
xe_force_wake_put(gt_to_fw(gt), fw_ref);
10571073
return ret;

drivers/gpu/drm/xe/xe_guc_submit.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <drm/drm_managed.h>
1616

1717
#include "abi/guc_actions_abi.h"
18+
#include "abi/guc_actions_slpc_abi.h"
1819
#include "abi/guc_klvs_abi.h"
1920
#include "regs/xe_lrc_layout.h"
2021
#include "xe_assert.h"
@@ -400,6 +401,7 @@ static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy,
400401
MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
401402
MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
402403
MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY)
404+
MAKE_EXEC_QUEUE_POLICY_ADD(slpc_ctx_freq_req, SLPM_GT_FREQUENCY)
403405
#undef MAKE_EXEC_QUEUE_POLICY_ADD
404406

405407
static const int xe_exec_queue_prio_to_guc[] = {
@@ -414,14 +416,19 @@ static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q)
414416
struct exec_queue_policy policy;
415417
enum xe_exec_queue_priority prio = q->sched_props.priority;
416418
u32 timeslice_us = q->sched_props.timeslice_us;
419+
u32 slpc_ctx_freq_req = 0;
417420
u32 preempt_timeout_us = q->sched_props.preempt_timeout_us;
418421

419422
xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
420423

424+
if (q->flags & DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT)
425+
slpc_ctx_freq_req |= SLPC_EXEC_QUEUE_FREQ_REQ_IS_COMPUTE;
426+
421427
__guc_exec_queue_policy_start_klv(&policy, q->guc->id);
422428
__guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]);
423429
__guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us);
424430
__guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us);
431+
__guc_exec_queue_policy_add_slpc_ctx_freq_req(&policy, slpc_ctx_freq_req);
425432

426433
xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
427434
__guc_exec_queue_policy_action_size(&policy), 0, 0);

include/uapi/drm/xe_drm.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1097,6 +1097,7 @@ struct drm_xe_vm_bind {
10971097
* .engine_class = DRM_XE_ENGINE_CLASS_RENDER,
10981098
* };
10991099
* struct drm_xe_exec_queue_create exec_queue_create = {
1100+
* .flags = DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT or 0
11001101
* .extensions = 0,
11011102
* .vm_id = vm,
11021103
* .num_bb_per_exec = 1,
@@ -1110,7 +1111,6 @@ struct drm_xe_exec_queue_create {
11101111
#define DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY 0
11111112
#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0
11121113
#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1
1113-
11141114
/** @extensions: Pointer to the first extension struct, if any */
11151115
__u64 extensions;
11161116

@@ -1123,6 +1123,7 @@ struct drm_xe_exec_queue_create {
11231123
/** @vm_id: VM to use for this exec queue */
11241124
__u32 vm_id;
11251125

1126+
#define DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT (0x1 << 1)
11261127
/** @flags: MBZ */
11271128
__u32 flags;
11281129

0 commit comments

Comments
 (0)