Skip to content

Commit 578d9c8

Browse files
tejasupjunxiaoc
authored andcommitted
drm/xe/mmap: Add mmap support for PCI memory barrier
In order to avoid having userspace to use MI_MEM_FENCE, we are adding a mechanism for userspace to generate a PCI memory barrier with low overhead (avoiding IOCTL call as well as writing to VRAM will adds some overhead). This is implemented by memory-mapping a page as uncached that is backed by MMIO on the dGPU and thus allowing userspace to do memory write to the page without invoking an IOCTL. We are selecting the MMIO so that it is not accessible from the PCI bus so that the MMIO writes themselves are ignored, but the PCI memory barrier will still take action as the MMIO filtering will happen after the memory barrier effect. When we detect special defined offset in mmap(), We are mapping 4K page which contains the last of page of doorbell MMIO range to userspace for same purpose. For user to query special offset we are adding special flag in mmap_offset ioctl which needs to be passed as follows, struct drm_xe_gem_mmap_offset mmo = { .handle = 0, /* this must be 0 */ .flags = DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER, }; igt_ioctl(fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &mmo); map = mmap(NULL, size, PROT_WRITE, MAP_SHARED, fd, mmo); IGT : https://gitlab.freedesktop.org/drm/igt-gpu-tools/-/commit/b2dbc6f22815128c0dd5c737504f42e1f1a6ad62 UMD : intel/compute-runtime#772 V7: - Dgpu filter added V6(MAuld) - Move physical mmap to fault handler - Modify kernel-doc and attach UMD PR when ready V5(MAuld) - Return invalid early in case of non 4K PAGE_SIZE - Format kernel-doc and add note for 4K PAGE_SIZE HW limit V4(MAuld) - Add kernel-doc for uapi change - Restrict page size to 4K V3(MAuld) - Remove offset defination from UAPI to be able to change later - Edit commit message for special flag addition V2(MAuld) - Add fault handler with dummy page to handle unplug device - Add Build check for special offset to be below normal start page - Test d3hot, mapping seems to be valid in d3hot as well - Add more info to commit message Cc: Matthew Auld <[email protected]> Acked-by: Michal Mrozek <[email protected]> Reviewed-by: Matthew Auld <[email protected]> Signed-off-by: Tejas Upadhyay <[email protected]> Signed-off-by: Matthew Auld <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent 16cdfcf commit 578d9c8

File tree

4 files changed

+154
-3
lines changed

4 files changed

+154
-3
lines changed

drivers/gpu/drm/xe/xe_bo.c

+18-1
Original file line numberDiff line numberDiff line change
@@ -2278,9 +2278,26 @@ int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
22782278
XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
22792279
return -EINVAL;
22802280

2281-
if (XE_IOCTL_DBG(xe, args->flags))
2281+
if (XE_IOCTL_DBG(xe, args->flags &
2282+
~DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER))
22822283
return -EINVAL;
22832284

2285+
if (args->flags & DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER) {
2286+
if (XE_IOCTL_DBG(xe, !IS_DGFX(xe)))
2287+
return -EINVAL;
2288+
2289+
if (XE_IOCTL_DBG(xe, args->handle))
2290+
return -EINVAL;
2291+
2292+
if (XE_IOCTL_DBG(xe, PAGE_SIZE > SZ_4K))
2293+
return -EINVAL;
2294+
2295+
BUILD_BUG_ON(((XE_PCI_BARRIER_MMAP_OFFSET >> XE_PTE_SHIFT) +
2296+
SZ_4K) >= DRM_FILE_PAGE_OFFSET_START);
2297+
args->offset = XE_PCI_BARRIER_MMAP_OFFSET;
2298+
return 0;
2299+
}
2300+
22842301
gem_obj = drm_gem_object_lookup(file, args->handle);
22852302
if (XE_IOCTL_DBG(xe, !gem_obj))
22862303
return -ENOENT;

drivers/gpu/drm/xe/xe_bo.h

+2
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@
7575

7676
#define XE_BO_PROPS_INVALID (-1)
7777

78+
#define XE_PCI_BARRIER_MMAP_OFFSET (0x50 << XE_PTE_SHIFT)
79+
7880
struct sg_table;
7981

8082
struct xe_bo *xe_bo_alloc(void);

drivers/gpu/drm/xe/xe_device.c

+106-1
Original file line numberDiff line numberDiff line change
@@ -232,12 +232,117 @@ static long xe_drm_compat_ioctl(struct file *file, unsigned int cmd, unsigned lo
232232
#define xe_drm_compat_ioctl NULL
233233
#endif
234234

235+
static void barrier_open(struct vm_area_struct *vma)
236+
{
237+
drm_dev_get(vma->vm_private_data);
238+
}
239+
240+
static void barrier_close(struct vm_area_struct *vma)
241+
{
242+
drm_dev_put(vma->vm_private_data);
243+
}
244+
245+
static void barrier_release_dummy_page(struct drm_device *dev, void *res)
246+
{
247+
struct page *dummy_page = (struct page *)res;
248+
249+
__free_page(dummy_page);
250+
}
251+
252+
static vm_fault_t barrier_fault(struct vm_fault *vmf)
253+
{
254+
struct drm_device *dev = vmf->vma->vm_private_data;
255+
struct vm_area_struct *vma = vmf->vma;
256+
vm_fault_t ret = VM_FAULT_NOPAGE;
257+
pgprot_t prot;
258+
int idx;
259+
260+
prot = vm_get_page_prot(vma->vm_flags);
261+
262+
if (drm_dev_enter(dev, &idx)) {
263+
unsigned long pfn;
264+
265+
#define LAST_DB_PAGE_OFFSET 0x7ff001
266+
pfn = PHYS_PFN(pci_resource_start(to_pci_dev(dev->dev), 0) +
267+
LAST_DB_PAGE_OFFSET);
268+
ret = vmf_insert_pfn_prot(vma, vma->vm_start, pfn,
269+
pgprot_noncached(prot));
270+
drm_dev_exit(idx);
271+
} else {
272+
struct page *page;
273+
274+
/* Allocate new dummy page to map all the VA range in this VMA to it*/
275+
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
276+
if (!page)
277+
return VM_FAULT_OOM;
278+
279+
/* Set the page to be freed using drmm release action */
280+
if (drmm_add_action_or_reset(dev, barrier_release_dummy_page, page))
281+
return VM_FAULT_OOM;
282+
283+
ret = vmf_insert_pfn_prot(vma, vma->vm_start, page_to_pfn(page),
284+
prot);
285+
}
286+
287+
return ret;
288+
}
289+
290+
static const struct vm_operations_struct vm_ops_barrier = {
291+
.open = barrier_open,
292+
.close = barrier_close,
293+
.fault = barrier_fault,
294+
};
295+
296+
static int xe_pci_barrier_mmap(struct file *filp,
297+
struct vm_area_struct *vma)
298+
{
299+
struct drm_file *priv = filp->private_data;
300+
struct drm_device *dev = priv->minor->dev;
301+
struct xe_device *xe = to_xe_device(dev);
302+
303+
if (!IS_DGFX(xe))
304+
return -EINVAL;
305+
306+
if (vma->vm_end - vma->vm_start > SZ_4K)
307+
return -EINVAL;
308+
309+
if (is_cow_mapping(vma->vm_flags))
310+
return -EINVAL;
311+
312+
if (vma->vm_flags & (VM_READ | VM_EXEC))
313+
return -EINVAL;
314+
315+
vm_flags_clear(vma, VM_MAYREAD | VM_MAYEXEC);
316+
vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_IO);
317+
vma->vm_ops = &vm_ops_barrier;
318+
vma->vm_private_data = dev;
319+
drm_dev_get(vma->vm_private_data);
320+
321+
return 0;
322+
}
323+
324+
static int xe_mmap(struct file *filp, struct vm_area_struct *vma)
325+
{
326+
struct drm_file *priv = filp->private_data;
327+
struct drm_device *dev = priv->minor->dev;
328+
329+
if (drm_dev_is_unplugged(dev))
330+
return -ENODEV;
331+
332+
switch (vma->vm_pgoff) {
333+
case XE_PCI_BARRIER_MMAP_OFFSET >> XE_PTE_SHIFT:
334+
return xe_pci_barrier_mmap(filp, vma);
335+
}
336+
337+
return drm_gem_mmap(filp, vma);
338+
}
339+
235340
static const struct file_operations xe_driver_fops = {
236341
.owner = THIS_MODULE,
237342
.open = drm_open,
238343
.release = drm_release_noglobal,
239344
.unlocked_ioctl = xe_drm_ioctl,
240-
.mmap = drm_gem_mmap,
345+
.mmap = xe_mmap,
241346
.poll = drm_poll,
242347
.read = drm_read,
243348
.compat_ioctl = xe_drm_compat_ioctl,

include/uapi/drm/xe_drm.h

+28-1
Original file line numberDiff line numberDiff line change
@@ -811,6 +811,32 @@ struct drm_xe_gem_create {
811811

812812
/**
813813
* struct drm_xe_gem_mmap_offset - Input of &DRM_IOCTL_XE_GEM_MMAP_OFFSET
814+
*
815+
* The @flags can be:
816+
* - %DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER - For user to query special offset
817+
* for use in mmap ioctl. Writing to the returned mmap address will generate a
818+
* PCI memory barrier with low overhead (avoiding IOCTL call as well as writing
819+
* to VRAM which would also add overhead), acting like an MI_MEM_FENCE
820+
* instruction.
821+
*
822+
* Note: The mmap size can be at most 4K, due to HW limitations. As a result
823+
* this interface is only supported on CPU architectures that support 4K page
824+
* size. The mmap_offset ioctl will detect this and gracefully return an
825+
* error, where userspace is expected to have a different fallback method for
826+
* triggering a barrier.
827+
*
828+
* Roughly the usage would be as follows:
829+
*
830+
* .. code-block:: C
831+
*
832+
* struct drm_xe_gem_mmap_offset mmo = {
833+
* .handle = 0, // must be set to 0
834+
* .flags = DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER,
835+
* };
836+
*
837+
* err = ioctl(fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &mmo);
838+
* map = mmap(NULL, size, PROT_WRITE, MAP_SHARED, fd, mmo.offset);
839+
* map[i] = 0xdeadbeaf; // issue barrier
814840
*/
815841
struct drm_xe_gem_mmap_offset {
816842
/** @extensions: Pointer to the first extension struct, if any */
@@ -819,7 +845,8 @@ struct drm_xe_gem_mmap_offset {
819845
/** @handle: Handle for the object being mapped. */
820846
__u32 handle;
821847

822-
/** @flags: Must be zero */
848+
#define DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER (1 << 0)
849+
/** @flags: Flags */
823850
__u32 flags;
824851

825852
/** @offset: The fake offset to use for subsequent mmap call */

0 commit comments

Comments
 (0)