From 6772233eab3bb118fb5c173fd47dea94bb69db42 Mon Sep 17 00:00:00 2001 From: Jeff Geerling Date: Sat, 11 Sep 2021 21:25:56 -0500 Subject: [PATCH 1/3] Coreforge's patch replacing fromio and toio with 32-bit compatible functions. --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 96 +++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 8 +- drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 4 +- drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 4 +- 9 files changed, 118 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 8ac6eb9f1fdb8a..87b3a89d374f2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1144,6 +1144,13 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, int emu_soc_asic_init(struct amdgpu_device *adev); +/* + * memcpy_io and memset_io functions that work on a raspberry pi 4 + */ +void memcpy_fromio_pcie(void *to, const volatile void __iomem *from, size_t count); +void memcpy_toio_pcie(volatile void __iomem *to, const void *from, size_t count); +void memset_io_pcie(volatile void __iomem *dst, int c, size_t count); + /* * Registers read & write functions. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index 27b19503773b93..37f374c6a75fda 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -114,7 +114,7 @@ static bool igp_read_bios_from_vram(struct amdgpu_device *adev) return false; } adev->bios_size = size; - memcpy_fromio(adev->bios, bios, size); + memcpy_fromio_pcie(adev->bios, bios, size); iounmap(bios); if (!check_atom_bios(adev->bios, size)) { @@ -143,7 +143,7 @@ bool amdgpu_read_bios(struct amdgpu_device *adev) return false; } adev->bios_size = size; - memcpy_fromio(adev->bios, bios, size); + memcpy_fromio_pcie(adev->bios, bios, size); pci_unmap_rom(adev->pdev, bios); if (!check_atom_bios(adev->bios, size)) { @@ -213,7 +213,7 @@ static bool amdgpu_read_platform_bios(struct amdgpu_device *adev) if (!bios) goto free_bios; - memcpy_fromio(adev->bios, bios, romlen); + memcpy_fromio_pcie(adev->bios, bios, romlen); iounmap(bios); if (!check_atom_bios(adev->bios, romlen)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f944ed858f3e7f..ec7a69c6d53733 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -127,6 +127,98 @@ const char *amdgpu_asic_name[] = { "LAST", }; +/** + * DOC: memcpy_fromio_pcie + * + * like memcpy_fromio, but it only uses 8-bit and 32-bit wide accesses to work on a raspberry pi 4 + */ + +void memcpy_fromio_pcie(void *to, const volatile void __iomem *from, size_t count) +{ + while (count && !IS_ALIGNED((unsigned long)from, 8)) { + *(u8 *)to = __raw_readb(from); + from++; + to++; + count--; + } + + while (count >= 4) { + *(u32 *)to = __raw_readl(from); + from += 4; + to += 4; + count -= 4; + } + + while (count) { + *(u8 *)to = __raw_readb(from); + from++; + to++; + count--; + } +} + +/** + * DOC: memcpy_toio_pcie + * + * like memcpy_toio, but it only uses 8-bit and 32-bit wide accesses to work on a raspberry pi 4 + */ + +void memcpy_toio_pcie(volatile void __iomem *to, const void *from, size_t count) +{ + while (count && !IS_ALIGNED((unsigned long)to, 8)) { + __raw_writeb(*(u8 *)from, to); + from++; + to++; + count--; + } + + while (count >= 4) { + __raw_writel(*(u64 *)from, to); + from += 4; + to += 4; + count -= 4; + } + + while (count) { + __raw_writeb(*(u8 *)from, to); + from++; + to++; + count--; + } +} + +/** + * DOC: memset_io_pcie + * + * like memset_io, but it only uses 8-bit and 32-bit wide accesses to work on a raspberry pi 4 + */ + +void memset_io_pcie(volatile void __iomem *dst, int c, size_t count) +{ + u32 qc = (u8)c; + + qc |= qc << 8; + qc |= qc << 16; + + while (count && !IS_ALIGNED((unsigned long)dst, 8)) { + __raw_writeb(c, dst); + dst++; + count--; + } + + while (count >= 4) { + __raw_writel(qc, dst); + dst += 4; + count -= 4; + } + + while (count) { + __raw_writeb(c, dst); + dst++; + count--; + } +} + /** * DOC: pcie_replay_count * @@ -313,13 +405,13 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, size_t count = last - pos; if (write) { - memcpy_toio(addr, buf, count); + memcpy_toio_pcie(addr, buf, count); mb(); amdgpu_device_flush_hdp(adev, NULL); } else { amdgpu_device_invalidate_hdp(adev, NULL); mb(); - memcpy_fromio(buf, addr, count); + memcpy_fromio_pcie(buf, addr, count); } if (count == size) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 3ec5099ffeb6c7..9b492861ab3656 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -3261,7 +3261,7 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev, if (ret) goto rel_buf; - memcpy_toio(cpu_addr, usbc_pd_fw->data, usbc_pd_fw->size); + memcpy_toio_pcie(cpu_addr, usbc_pd_fw->data, usbc_pd_fw->size); /* * x86 specific workaround. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 0f576f294d8a5f..5fd69a947650a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -409,7 +409,7 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) if (in_ras_intr) memset(adev->uvd.inst[j].saved_bo, 0, size); else - memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size); + memcpy_fromio_pcie(adev->uvd.inst[j].saved_bo, ptr, size); drm_dev_exit(idx); } @@ -438,7 +438,7 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev) if (adev->uvd.inst[i].saved_bo != NULL) { if (drm_dev_enter(&adev->ddev, &idx)) { - memcpy_toio(ptr, adev->uvd.inst[i].saved_bo, size); + memcpy_toio_pcie(ptr, adev->uvd.inst[i].saved_bo, size); drm_dev_exit(idx); } kvfree(adev->uvd.inst[i].saved_bo); @@ -451,14 +451,14 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev) if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { offset = le32_to_cpu(hdr->ucode_array_offset_bytes); if (drm_dev_enter(&adev->ddev, &idx)) { - memcpy_toio(adev->uvd.inst[i].cpu_addr, adev->uvd.fw->data + offset, + memcpy_toio_pcie(adev->uvd.inst[i].cpu_addr, adev->uvd.fw->data + offset, le32_to_cpu(hdr->ucode_size_bytes)); drm_dev_exit(idx); } size -= le32_to_cpu(hdr->ucode_size_bytes); ptr += le32_to_cpu(hdr->ucode_size_bytes); } - memset_io(ptr, 0, size); + memset_io_pcie(ptr, 0, size); /* to restore uvd fence seq */ amdgpu_fence_driver_force_completion(&adev->uvd.inst[i].ring); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 1ae7f824adc7ac..86fc4030db2f35 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -316,7 +316,7 @@ int amdgpu_vce_resume(struct amdgpu_device *adev) offset = le32_to_cpu(hdr->ucode_array_offset_bytes); if (drm_dev_enter(&adev->ddev, &idx)) { - memcpy_toio(cpu_addr, adev->vce.fw->data + offset, + memcpy_toio_pcie(cpu_addr, adev->vce.fw->data + offset, adev->vce.fw->size - offset); drm_dev_exit(idx); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 6780df0fb26554..fedc7a36e6a139 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -333,7 +333,7 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev) return -ENOMEM; if (drm_dev_enter(&adev->ddev, &idx)) { - memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size); + memcpy_fromio_pcie(adev->vcn.inst[i].saved_bo, ptr, size); drm_dev_exit(idx); } } @@ -357,7 +357,7 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev) if (adev->vcn.inst[i].saved_bo != NULL) { if (drm_dev_enter(&adev->ddev, &idx)) { - memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size); + memcpy_toio_pcie(ptr, adev->vcn.inst[i].saved_bo, size); drm_dev_exit(idx); } kvfree(adev->vcn.inst[i].saved_bo); @@ -370,14 +370,14 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev) if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { offset = le32_to_cpu(hdr->ucode_array_offset_bytes); if (drm_dev_enter(&adev->ddev, &idx)) { - memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset, + memcpy_toio_pcie(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset, le32_to_cpu(hdr->ucode_size_bytes)); drm_dev_exit(idx); } size -= le32_to_cpu(hdr->ucode_size_bytes); ptr += le32_to_cpu(hdr->ucode_size_bytes); } - memset_io(ptr, 0, size); + memset_io_pcie(ptr, 0, size); } } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index bc133db2d538b2..4da7574e48000f 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -689,7 +689,7 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) } if (drm_dev_enter(&adev->ddev, &idx)) { - memcpy_fromio(buf, adev->mman.aper_base_kaddr, sz); + memcpy_fromio_pcie(buf, adev->mman.aper_base_kaddr, sz); ret = psp_v11_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN); if (ret) { DRM_ERROR("Send long training msg failed.\n"); @@ -698,7 +698,7 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) return ret; } - memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); + memcpy_toio_pcie(adev->mman.aper_base_kaddr, buf, sz); adev->hdp.funcs->flush_hdp(adev, NULL); vfree(buf); drm_dev_exit(idx); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 90910d19db122f..340f3d4645f59a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -566,7 +566,7 @@ static int vce_v4_0_suspend(void *handle) unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); void *ptr = adev->vce.cpu_addr; - memcpy_fromio(adev->vce.saved_bo, ptr, size); + memcpy_fromio_pcie(adev->vce.saved_bo, ptr, size); } drm_dev_exit(idx); } @@ -592,7 +592,7 @@ static int vce_v4_0_resume(void *handle) unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); void *ptr = adev->vce.cpu_addr; - memcpy_toio(ptr, adev->vce.saved_bo, size); + memcpy_toio_pcie(ptr, adev->vce.saved_bo, size); drm_dev_exit(idx); } } else { From d1ad02b0a02a97318d33d45ab6574cf2c3805529 Mon Sep 17 00:00:00 2001 From: Jeff Geerling Date: Mon, 13 Sep 2021 09:34:46 -0500 Subject: [PATCH 2/3] Fix whitespace errors. --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ec7a69c6d53733..e7714fc485f021 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -132,7 +132,7 @@ const char *amdgpu_asic_name[] = { * * like memcpy_fromio, but it only uses 8-bit and 32-bit wide accesses to work on a raspberry pi 4 */ - + void memcpy_fromio_pcie(void *to, const volatile void __iomem *from, size_t count) { while (count && !IS_ALIGNED((unsigned long)from, 8)) { @@ -162,7 +162,7 @@ void memcpy_fromio_pcie(void *to, const volatile void __iomem *from, size_t coun * * like memcpy_toio, but it only uses 8-bit and 32-bit wide accesses to work on a raspberry pi 4 */ - + void memcpy_toio_pcie(volatile void __iomem *to, const void *from, size_t count) { while (count && !IS_ALIGNED((unsigned long)to, 8)) { @@ -192,7 +192,7 @@ void memcpy_toio_pcie(volatile void __iomem *to, const void *from, size_t count) * * like memset_io, but it only uses 8-bit and 32-bit wide accesses to work on a raspberry pi 4 */ - + void memset_io_pcie(volatile void __iomem *dst, int c, size_t count) { u32 qc = (u8)c; From 11e5a855327380470f96627c9e711b77b75fa709 Mon Sep 17 00:00:00 2001 From: Jeff Geerling Date: Wed, 15 Sep 2021 12:52:05 -0500 Subject: [PATCH 3/3] Replace a number of memcpy/memset with proper functions. Debug. --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 33 ++++++++++++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 4 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 38 +++++++++++----------- drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 ++ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 12 +++---- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/nv.c | 2 +- drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 4 ++- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 4 +-- 10 files changed, 67 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e7714fc485f021..c7224333198f87 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1169,10 +1169,10 @@ static int amdgpu_device_wb_init(struct amdgpu_device *adev) } adev->wb.num_wb = AMDGPU_MAX_WB; - memset(&adev->wb.used, 0, sizeof(adev->wb.used)); + memset_io_pcie(&adev->wb.used, 0, sizeof(adev->wb.used)); /* clear wb memory */ - memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8); + memset_io_pcie((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8); } return 0; @@ -2319,6 +2319,9 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev) int i; uint32_t smu_version; + printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); + msleep(500); + if (adev->asic_type >= CHIP_VEGA10) { for (i = 0; i < adev->num_ip_blocks; i++) { if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP) @@ -2339,7 +2342,17 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev) return r; } } else { + + printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); + printk(KERN_ALERT "DEBUG: On IP block <%s>\n", adev->ip_blocks[i].version->funcs->name); + printk(KERN_ALERT "DEBUG: On IP block %d \n",i); + msleep(500); + r = adev->ip_blocks[i].version->funcs->hw_init(adev); + + printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); + msleep(500); + if (r) { DRM_ERROR("hw_init of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); @@ -2378,6 +2391,9 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) return r; for (i = 0; i < adev->num_ip_blocks; i++) { + printk(KERN_ALERT "DEBUG: On IP block <%s>\n", adev->ip_blocks[i].version->funcs->name); + msleep(500); + if (!adev->ip_blocks[i].status.valid) continue; r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev); @@ -2434,14 +2450,23 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (r) goto init_failed; + printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); + msleep(500); + r = amdgpu_device_ip_hw_init_phase1(adev); if (r) goto init_failed; + printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); + msleep(500); + r = amdgpu_device_fw_loading(adev); if (r) goto init_failed; + printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); + msleep(500); + r = amdgpu_device_ip_hw_init_phase2(adev); if (r) goto init_failed; @@ -4944,7 +4969,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, int tmp_vram_lost_counter; struct amdgpu_reset_context reset_context; - memset(&reset_context, 0, sizeof(reset_context)); + memset_io_pcie(&reset_context, 0, sizeof(reset_context)); /* * Special case: RAS triggered and full reset isn't supported @@ -5483,7 +5508,7 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) DRM_INFO("PCI error: slot reset callback!!\n"); - memset(&reset_context, 0, sizeof(reset_context)); + memset_io_pcie(&reset_context, 0, sizeof(reset_context)); INIT_LIST_HEAD(&device_list); list_add_tail(&adev->reset_list, &device_list); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index b4ced45301becd..4f9829e7d3af9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -147,7 +147,7 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_s unsigned se, sh, cu; const char *p; - memset(mask, 0, sizeof(*mask) * max_se * max_sh); + memset_io_pcie(mask, 0, sizeof(*mask) * max_se * max_sh); if (!amdgpu_disable_cu || !*amdgpu_disable_cu) return; @@ -345,7 +345,7 @@ int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, return r; } - memset(hpd, 0, hpd_size); + memset_io_pcie(hpd, 0, hpd_size); r = amdgpu_bo_reserve(kiq->eop_obj, true); if (unlikely(r != 0)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 9b492861ab3656..f24fb99a3a81d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -261,7 +261,7 @@ static int psp_sw_init(void *handle) } } - memset(&boot_cfg_entry, 0, sizeof(boot_cfg_entry)); + memset_io_pcie(&boot_cfg_entry, 0, sizeof(boot_cfg_entry)); if (psp_get_runtime_db_entry(adev, PSP_RUNTIME_ENTRY_TYPE_BOOT_CONFIG, &boot_cfg_entry)) { @@ -375,7 +375,7 @@ psp_cmd_submit_buf(struct psp_context *psp, mutex_lock(&psp->mutex); - memset(psp->cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE); + memset_io_pcie(psp->cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE); memcpy(psp->cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp)); @@ -654,7 +654,7 @@ static int psp_boot_config_get(struct amdgpu_device *adev, uint32_t *boot_cfg) if (amdgpu_sriov_vf(adev)) return 0; - memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); + memset_io_pcie(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); cmd->cmd_id = GFX_CMD_ID_BOOT_CFG; cmd->cmd.boot_cfg.sub_cmd = BOOTCFG_CMD_GET; @@ -676,7 +676,7 @@ static int psp_boot_config_set(struct amdgpu_device *adev, uint32_t boot_cfg) if (amdgpu_sriov_vf(adev)) return 0; - memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); + memset_io_pcie(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); cmd->cmd_id = GFX_CMD_ID_BOOT_CFG; cmd->cmd.boot_cfg.sub_cmd = BOOTCFG_CMD_SET; @@ -694,10 +694,10 @@ static int psp_rl_load(struct amdgpu_device *adev) if (psp->rl_bin_size == 0) return 0; - memset(psp->fw_pri_buf, 0, PSP_1_MEG); + memset_io_pcie(psp->fw_pri_buf, 0, PSP_1_MEG); memcpy(psp->fw_pri_buf, psp->rl_start_addr, psp->rl_bin_size); - memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); + memset_io_pcie(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW; cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(psp->fw_pri_mc_addr); @@ -993,7 +993,7 @@ int psp_xgmi_initialize(struct psp_context *psp) /* Initialize XGMI session */ xgmi_cmd = (struct ta_xgmi_shared_memory *)(psp->xgmi_context.xgmi_shared_buf); - memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); + memset_io_pcie(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); xgmi_cmd->cmd_id = TA_COMMAND_XGMI__INITIALIZE; ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id); @@ -1007,7 +1007,7 @@ int psp_xgmi_get_hive_id(struct psp_context *psp, uint64_t *hive_id) int ret; xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.xgmi_shared_buf; - memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); + memset_io_pcie(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_HIVE_ID; @@ -1027,7 +1027,7 @@ int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id) int ret; xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.xgmi_shared_buf; - memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); + memset_io_pcie(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_NODE_ID; @@ -1055,7 +1055,7 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, return -EINVAL; xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.xgmi_shared_buf; - memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); + memset_io_pcie(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); /* Fill in the shared memory with topology information as input */ topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info; @@ -1099,7 +1099,7 @@ int psp_xgmi_set_topology_info(struct psp_context *psp, return -EINVAL; xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.xgmi_shared_buf; - memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); + memset_io_pcie(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info; xgmi_cmd->cmd_id = TA_COMMAND_XGMI__SET_TOPOLOGY_INFO; @@ -1283,7 +1283,7 @@ int psp_ras_enable_features(struct psp_context *psp, return -EINVAL; ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; - memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); + memset_io_pcie(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); if (enable) ras_cmd->cmd_id = TA_RAS_COMMAND__ENABLE_FEATURES; @@ -1408,7 +1408,7 @@ int psp_ras_trigger_error(struct psp_context *psp, return -EINVAL; ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; - memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); + memset_io_pcie(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); ras_cmd->cmd_id = TA_RAS_COMMAND__TRIGGER_ERROR; ras_cmd->ras_in_message.trigger_error = *info; @@ -1875,7 +1875,7 @@ int psp_rap_invoke(struct psp_context *psp, uint32_t ta_cmd_id, enum ta_rap_stat rap_cmd = (struct ta_rap_shared_memory *) psp->rap_context.rap_shared_buf; - memset(rap_cmd, 0, sizeof(struct ta_rap_shared_memory)); + memset_io_pcie(rap_cmd, 0, sizeof(struct ta_rap_shared_memory)); rap_cmd->cmd_id = ta_cmd_id; rap_cmd->validation_method_id = METHOD_A; @@ -1921,7 +1921,7 @@ static int psp_securedisplay_load(struct psp_context *psp) if (!cmd) return -ENOMEM; - memset(psp->fw_pri_buf, 0, PSP_1_MEG); + memset_io_pcie(psp->fw_pri_buf, 0, PSP_1_MEG); memcpy(psp->fw_pri_buf, psp->ta_securedisplay_start_addr, psp->ta_securedisplay_ucode_size); psp_prep_ta_load_cmd_buf(cmd, @@ -2299,7 +2299,7 @@ static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode, int ret; uint64_t fw_mem_mc_addr = ucode->mc_addr; - memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); + memset_io_pcie(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW; cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(fw_mem_mc_addr); @@ -2514,7 +2514,7 @@ static int psp_load_fw(struct amdgpu_device *adev) if (ret) goto failed; - memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE); + memset_io_pcie(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE); ret = psp_ring_init(psp, PSP_RING_TYPE__KM); if (ret) { @@ -2862,7 +2862,7 @@ int psp_ring_cmd_submit(struct psp_context *psp, } /* Initialize KM RB frame */ - memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame)); + memset_io_pcie(write_frame, 0, sizeof(struct psp_gfx_rb_frame)); /* Update KM RB frame */ write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr); @@ -3297,7 +3297,7 @@ void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size if (!drm_dev_enter(&psp->adev->ddev, &idx)) return; - memset(psp->fw_pri_buf, 0, PSP_1_MEG); + memset_io_pcie(psp->fw_pri_buf, 0, PSP_1_MEG); memcpy(psp->fw_pri_buf, start_addr, bin_size); drm_dev_exit(idx); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index 524d10b2104129..fa28f28e975aec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -70,7 +70,7 @@ int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev, return r; } - memset(sa_manager->cpu_ptr, 0, sa_manager->size); + memset_io_pcie(sa_manager->cpu_ptr, 0, sa_manager->size); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 3a55f08e00e1df..340fffc0e8e510 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1099,6 +1099,9 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, else caching = ttm_cached; + // Test disabling cache entirely. + caching = ttm_uncached; + /* allocate space for the uninitialized page entries */ if (ttm_sg_tt_init(>t->ttm, bo, page_flags, caching)) { kfree(gtt); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index a64b2c706090ea..f6bb29f4e8a6f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3827,7 +3827,7 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) gpu_addr = adev->wb.gpu_addr + (index * 4); adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); - memset(&ib, 0, sizeof(ib)); + memset_io_pcie(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib); if (r) @@ -4438,7 +4438,7 @@ static int gfx_v10_0_mec_init(struct amdgpu_device *adev) return r; } - memset(hpd, 0, mec_hpd_size); + memset_io_pcie(hpd, 0, mec_hpd_size); amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); @@ -5488,7 +5488,7 @@ static void gfx_v10_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *ade memcpy(ptr + toc_offset, fw_data, fw_size); if (fw_size < toc_fw_size) - memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size); + memset_io_pcie(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size); } static void gfx_v10_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev) @@ -6643,7 +6643,7 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring) int mqd_idx = ring - &adev->gfx.gfx_ring[0]; if (!amdgpu_in_reset(adev) && !adev->in_suspend) { - memset((void *)mqd, 0, sizeof(*mqd)); + memset_io_pcie((void *)mqd, 0, sizeof(*mqd)); mutex_lock(&adev->srbm_mutex); nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); gfx_v10_0_gfx_mqd_init(ring); @@ -7022,7 +7022,7 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring) nv_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } else { - memset((void *)mqd, 0, sizeof(*mqd)); + memset_io_pcie((void *)mqd, 0, sizeof(*mqd)); mutex_lock(&adev->srbm_mutex); nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); gfx_v10_0_compute_mqd_init(ring); @@ -7044,7 +7044,7 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring) int mqd_idx = ring - &adev->gfx.compute_ring[0]; if (!amdgpu_in_reset(adev) && !adev->in_suspend) { - memset((void *)mqd, 0, sizeof(*mqd)); + memset_io_pcie((void *)mqd, 0, sizeof(*mqd)); mutex_lock(&adev->srbm_mutex); nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); gfx_v10_0_compute_mqd_init(ring); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 4523df2785d633..8cbd2616eedd17 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -142,7 +142,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, if (!printk_ratelimit()) return 0; - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); + memset_io_pcie(&task_info, 0, sizeof(struct amdgpu_task_info)); amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); dev_err(adev->dev, diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 94d029dbf30da5..dfcc32bf146112 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -803,7 +803,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) #endif amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block); - amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); + // amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && is_support_sw_smu(adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 4da7574e48000f..fc6ed7b87ef2af 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -617,7 +617,9 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) void *buf; int ret, idx; - if (ctx->init == PSP_MEM_TRAIN_NOT_SUPPORT) { + // Disable memory training because it causes hard lockup on Pi. + // if (ctx->init == PSP_MEM_TRAIN_NOT_SUPPORT) { + if (true) { DRM_DEBUG("Memory training is not supported.\n"); return 0; } else if (ctx->init != PSP_MEM_TRAIN_INIT_SUCCESS) { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 7486e530678679..b8819520084537 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -125,7 +125,7 @@ static void sdma_v5_2_destroy_inst_ctx(struct amdgpu_device *adev) { release_firmware(adev->sdma.instance[0].fw); - memset((void *)adev->sdma.instance, 0, + memset_io_pcie((void *)adev->sdma.instance, 0, sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES); } @@ -987,7 +987,7 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout) gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); - memset(&ib, 0, sizeof(ib)); + memset_io_pcie(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);