summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2024-02-06 03:36:15 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2024-02-06 03:36:15 +0000
commit1ab1957d95a38e35958a4be9acbc250a2193f3bd (patch)
tree1934521508e03accb4d7ff24b77b9e2a8f7f6d1f
parentba3c7df36e116a2dc9e02b8a3fc8b7191301e4d5 (diff)
drm/amdgpu: Fix ecc irq enable/disable unpaired
From Stanley Yang 0a8fc4e007b933d46f079a1d9ab8539a4d8439ef in linux-6.6.y/6.6.16 a32c6f7f5737cc7e31cd7ad5133f0d96fca12ea6 in mainline linux
-rw-r--r--sys/dev/pci/drm/amd/amdgpu/aldebaran.c26
-rw-r--r--sys/dev/pci/drm/amd/amdgpu/gmc_v10_0.c4
-rw-r--r--sys/dev/pci/drm/amd/amdgpu/gmc_v11_0.c5
-rw-r--r--sys/dev/pci/drm/amd/amdgpu/gmc_v9_0.c4
4 files changed, 38 insertions, 1 deletions
diff --git a/sys/dev/pci/drm/amd/amdgpu/aldebaran.c b/sys/dev/pci/drm/amd/amdgpu/aldebaran.c
index 2b97b8a96fb..fa6193535d4 100644
--- a/sys/dev/pci/drm/amd/amdgpu/aldebaran.c
+++ b/sys/dev/pci/drm/amd/amdgpu/aldebaran.c
@@ -333,6 +333,7 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
{
struct list_head *reset_device_list = reset_context->reset_device_list;
struct amdgpu_device *tmp_adev = NULL;
+ struct amdgpu_ras *con;
int r;
if (reset_device_list == NULL)
@@ -358,7 +359,30 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
*/
amdgpu_register_gpu_instance(tmp_adev);
- /* Resume RAS */
+ /* Resume RAS, ecc_irq */
+ con = amdgpu_ras_get_context(tmp_adev);
+ if (!amdgpu_sriov_vf(tmp_adev) && con) {
+ if (tmp_adev->sdma.ras &&
+ tmp_adev->sdma.ras->ras_block.ras_late_init) {
+ r = tmp_adev->sdma.ras->ras_block.ras_late_init(tmp_adev,
+ &tmp_adev->sdma.ras->ras_block.ras_comm);
+ if (r) {
+ dev_err(tmp_adev->dev, "SDMA failed to execute ras_late_init! ret:%d\n", r);
+ goto end;
+ }
+ }
+
+ if (tmp_adev->gfx.ras &&
+ tmp_adev->gfx.ras->ras_block.ras_late_init) {
+ r = tmp_adev->gfx.ras->ras_block.ras_late_init(tmp_adev,
+ &tmp_adev->gfx.ras->ras_block.ras_comm);
+ if (r) {
+ dev_err(tmp_adev->dev, "GFX failed to execute ras_late_init! ret:%d\n", r);
+ goto end;
+ }
+ }
+ }
+
amdgpu_ras_resume(tmp_adev);
/* Update PSP FW topology after reset */
diff --git a/sys/dev/pci/drm/amd/amdgpu/gmc_v10_0.c b/sys/dev/pci/drm/amd/amdgpu/gmc_v10_0.c
index c5aebb866b4..c05cb4cfd68 100644
--- a/sys/dev/pci/drm/amd/amdgpu/gmc_v10_0.c
+++ b/sys/dev/pci/drm/amd/amdgpu/gmc_v10_0.c
@@ -1141,6 +1141,10 @@ static int gmc_v10_0_hw_fini(void *handle)
amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
+ if (adev->gmc.ecc_irq.funcs &&
+ amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+ amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
+
return 0;
}
diff --git a/sys/dev/pci/drm/amd/amdgpu/gmc_v11_0.c b/sys/dev/pci/drm/amd/amdgpu/gmc_v11_0.c
index f4f3fa2c677..df78be203f6 100644
--- a/sys/dev/pci/drm/amd/amdgpu/gmc_v11_0.c
+++ b/sys/dev/pci/drm/amd/amdgpu/gmc_v11_0.c
@@ -974,6 +974,11 @@ static int gmc_v11_0_hw_fini(void *handle)
}
amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
+
+ if (adev->gmc.ecc_irq.funcs &&
+ amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+ amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
+
gmc_v11_0_gart_disable(adev);
return 0;
diff --git a/sys/dev/pci/drm/amd/amdgpu/gmc_v9_0.c b/sys/dev/pci/drm/amd/amdgpu/gmc_v9_0.c
index e116624ca93..0576e19bf50 100644
--- a/sys/dev/pci/drm/amd/amdgpu/gmc_v9_0.c
+++ b/sys/dev/pci/drm/amd/amdgpu/gmc_v9_0.c
@@ -2420,6 +2420,10 @@ static int gmc_v9_0_hw_fini(void *handle)
amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
+ if (adev->gmc.ecc_irq.funcs &&
+ amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+ amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
+
return 0;
}