OCR'ed and edited a bit, there might be mistakes.
Picture: https://dump.sha256.net/dump/unhibernating_panic.jpg
unhibernating & block 50329599 Length 243MB
uvm_fault(0xffffffff826b2860, 0x38, 0, 1) →> e
kernel: page fault trap, code=0
Stopped at ttm_resource_manager_evict_all+0x5e: cmpq %rbx, 0x38(%r14)
TID PID UID PRFLAGS PFLAGS CPU COMMAND
* 0 0 0 0x100000 0x20 0K swapper
ttm_resource_manager_evict_all(ffff80000017f260,0,dba63e95861e671,ffff800000170000,ffff800000170058,2)
at ttm_resource_
manager_evict_all+0x5e
amdgpu_device_prepare(ffff800000170058, ffff800000170058, fac0345246af 9871,
ffff800000170058,0,2) at amdgpu_device_prepare
+0x61
amdgpu_activate(ffff800000170000, 2, b6a78044d3a303c5,0, ffff80000014400,
fffffff f8228acc8) at amdgpu_activate+0x55
config_activate_children(ffff800000144c00,2,172aac03cc1e?5dd,0,ffff80000014a000,2)
at config_activate_children+0x85
config_activate_children(ffff80000014a000,2,172aac03cc1e75dd,0,ffff800000144100,2)
at config_activate_children+0x85
config_activate_children(ffff800000144100,2,172aac03ccle75dd,0,
ffff800000030280,2) at config_activate_chiLdren+0x85
config_activate_children(ffff800000030280,2,172aac03cc1e7256,2,ffff800000030280,0)
config_suspend_all (2,2,72519cb31f5203, fffffff f82a94a38,0,bfff50) at
config_suspend_all+0x1ae
hibernate_resume(8c03129a1118d1c,ffffffff82a9460,ffff800000142200,0.0,0) at
hibernate_resume+0x1b4
diskconf (25badalafa9d6262,8, ffffffff82538360, ffffffff82a8008,400056f4b50,8)
at diskconf+0x188
main(0,0,1001000, ffff800037c871f0,ffffffff81fda030,ffffffff82a94f40) at
main+0x510
I've bisected it to this changeset:
https://codeberg.org/OpenBSD/src/commit/36668b1581688d40ad5fd6631f4f503e6d36091d
suspend / resume seems to be unaffected by this, reverting makes
hibernate / unhibernate work again.
diff --git sys/dev/pci/drm/amd/amdgpu/amdgpu.h
sys/dev/pci/drm/amd/amdgpu/amdgpu.h
index 38a424f16fb..afac024456e 100644
--- sys/dev/pci/drm/amd/amdgpu/amdgpu.h
+++ sys/dev/pci/drm/amd/amdgpu/amdgpu.h
@@ -1398,7 +1398,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
void amdgpu_driver_release_kms(struct drm_device *dev);
int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
-int amdgpu_device_prepare(struct drm_device *dev);
int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc);
diff --git sys/dev/pci/drm/amd/amdgpu/amdgpu_device.c
sys/dev/pci/drm/amd/amdgpu/amdgpu_device.c
index 2d96609911e..7901aeb4dfd 100644
--- sys/dev/pci/drm/amd/amdgpu/amdgpu_device.c
+++ sys/dev/pci/drm/amd/amdgpu/amdgpu_device.c
@@ -1568,7 +1568,6 @@ static void amdgpu_switcheroo_set_state(struct pci_dev
*pdev,
} else {
pr_info("switched off\n");
dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
- amdgpu_device_prepare(dev);
amdgpu_device_suspend(dev, true);
amdgpu_device_cache_pci_state(pdev);
/* Shut down the device */
@@ -4206,43 +4205,6 @@ static int amdgpu_device_evict_resources(struct
amdgpu_device *adev)
/*
* Suspend & resume.
*/
-/**
- * amdgpu_device_prepare - prepare for device suspend
- *
- * @dev: drm dev pointer
- *
- * Prepare to put the hw in the suspend state (all asics).
- * Returns 0 for success or an error on failure.
- * Called at driver suspend.
- */
-int amdgpu_device_prepare(struct drm_device *dev)
-{
- struct amdgpu_device *adev = drm_to_adev(dev);
- int i, r;
-
- if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
- return 0;
-
- /* Evict the majority of BOs before starting suspend sequence */
- r = amdgpu_device_evict_resources(adev);
- if (r)
- return r;
-
- flush_delayed_work(&adev->gfx.gfx_off_delay_work);
-
- for (i = 0; i < adev->num_ip_blocks; i++) {
- if (!adev->ip_blocks[i].status.valid)
- continue;
- if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
- continue;
- r = adev->ip_blocks[i].version->funcs->prepare_suspend((void
*)adev);
- if (r)
- return r;
- }
-
- return 0;
-}
-
/**
* amdgpu_device_suspend - initiate device suspend
*
@@ -4268,6 +4230,11 @@ int amdgpu_device_suspend(struct drm_device *dev, bool
fbcon)
adev->in_suspend = true;
+ /* Evict the majority of BOs before grabbing the full access */
+ r = amdgpu_device_evict_resources(adev);
+ if (r)
+ return r;
+
if (amdgpu_sriov_vf(adev)) {
amdgpu_virt_fini_data_exchange(adev);
r = amdgpu_virt_request_full_gpu(adev, false);
diff --git sys/dev/pci/drm/amd/amdgpu/amdgpu_drv.c
sys/dev/pci/drm/amd/amdgpu/amdgpu_drv.c
index 328f10f9a0d..3c0df8a235e 100644
--- sys/dev/pci/drm/amd/amdgpu/amdgpu_drv.c
+++ sys/dev/pci/drm/amd/amdgpu/amdgpu_drv.c
@@ -2393,9 +2393,8 @@ static int amdgpu_pmops_prepare(struct device *dev)
/* Return a positive number here so
* DPM_FLAG_SMART_SUSPEND works properly
*/
- if (amdgpu_device_supports_boco(drm_dev) &&
- pm_runtime_suspended(dev))
- return 1;
+ if (amdgpu_device_supports_boco(drm_dev))
+ return pm_runtime_suspended(dev);
/* if we will not support s3 or s2i for the device
* then skip suspend
@@ -2404,7 +2403,7 @@ static int amdgpu_pmops_prepare(struct device *dev)
!amdgpu_acpi_is_s3_active(adev))
return 1;
- return amdgpu_device_prepare(drm_dev);
+ return 0;
}
static void amdgpu_pmops_complete(struct device *dev)
@@ -2606,9 +2605,6 @@ static int amdgpu_pmops_runtime_suspend(struct device
*dev)
if (amdgpu_device_supports_boco(drm_dev))
adev->mp1_state = PP_MP1_STATE_UNLOAD;
- ret = amdgpu_device_prepare(drm_dev);
- if (ret)
- return ret;
ret = amdgpu_device_suspend(drm_dev, false);
if (ret) {
adev->in_runpm = false;
@@ -3671,7 +3667,6 @@ amdgpu_activate(struct device *self, int act)
switch (act) {
case DVACT_QUIESCE:
rv = config_activate_children(self, act);
- amdgpu_device_prepare(dev);
amdgpu_device_suspend(dev, true);
break;
case DVACT_SUSPEND:
--
In my defence, I have been left unsupervised.