[PATCH v5 3/5] drm/amdgpu: Encapsulate all device reset info

From: André Almeida
Date: Thu Aug 17 2023 - 14:24:39 EST


To better organize struct amdgpu_device, keep all reset information
related fields together in a separated struct.

Signed-off-by: André Almeida <andrealmeid@xxxxxxxxxx>
---
v5: new patch, as requested by Shashank Sharma
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 34 +++++++++++++--------
drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 10 +++---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 16 +++++-----
3 files changed, 34 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 0d560b713948..56d78ca6e917 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -781,6 +781,26 @@ struct amdgpu_mqd {
#define AMDGPU_PRODUCT_NAME_LEN 64
struct amdgpu_reset_domain;

+#ifdef CONFIG_DEV_COREDUMP
+struct amdgpu_coredump_info {
+ struct amdgpu_device *adev;
+ struct amdgpu_task_info reset_task_info;
+ struct timespec64 reset_time;
+ bool reset_vram_lost;
+};
+#endif
+
+struct amdgpu_reset_info {
+ /* reset dump register */
+ u32 *reset_dump_reg_list;
+ u32 *reset_dump_reg_value;
+ int num_regs;
+
+#ifdef CONFIG_DEV_COREDUMP
+ struct amdgpu_coredump_info *coredump_info;
+#endif
+};
+
/*
* Non-zero (true) if the GPU has VRAM. Zero (false) otherwise.
*/
@@ -1084,10 +1104,7 @@ struct amdgpu_device {

struct mutex benchmark_mutex;

- /* reset dump register */
- uint32_t *reset_dump_reg_list;
- uint32_t *reset_dump_reg_value;
- int num_regs;
+ struct amdgpu_reset_info reset_info;

bool scpm_enabled;
uint32_t scpm_status;
@@ -1100,15 +1117,6 @@ struct amdgpu_device {
uint32_t aid_mask;
};

-#ifdef CONFIG_DEV_COREDUMP
-struct amdgpu_coredump_info {
- struct amdgpu_device *adev;
- struct amdgpu_task_info reset_task_info;
- struct timespec64 reset_time;
- bool reset_vram_lost;
-};
-#endif
-
static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
{
return container_of(ddev, struct amdgpu_device, ddev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index a4faea4fa0b5..3136a0774dd9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -2016,8 +2016,8 @@ static ssize_t amdgpu_reset_dump_register_list_read(struct file *f,
if (ret)
return ret;

- for (i = 0; i < adev->num_regs; i++) {
- sprintf(reg_offset, "0x%x\n", adev->reset_dump_reg_list[i]);
+ for (i = 0; i < adev->reset_info.num_regs; i++) {
+ sprintf(reg_offset, "0x%x\n", adev->reset_info.reset_dump_reg_list[i]);
up_read(&adev->reset_domain->sem);
if (copy_to_user(buf + len, reg_offset, strlen(reg_offset)))
return -EFAULT;
@@ -2074,9 +2074,9 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f,
if (ret)
goto error_free;

- swap(adev->reset_dump_reg_list, tmp);
- swap(adev->reset_dump_reg_value, new);
- adev->num_regs = i;
+ swap(adev->reset_info.reset_dump_reg_list, tmp);
+ swap(adev->reset_info.reset_dump_reg_value, new);
+ adev->reset_info.num_regs = i;
up_write(&adev->reset_domain->sem);
ret = size;

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index b5b879bcc5c9..96975591841d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4790,10 +4790,10 @@ static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)

lockdep_assert_held(&adev->reset_domain->sem);

- for (i = 0; i < adev->num_regs; i++) {
- adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
- trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
- adev->reset_dump_reg_value[i]);
+ for (i = 0; i < adev->reset_info.num_regs; i++) {
+ adev->reset_info.reset_dump_reg_value[i] = RREG32(adev->reset_info.reset_dump_reg_list[i]);
+ trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i],
+ adev->reset_info.reset_dump_reg_value[i]);
}

return 0;
@@ -4831,13 +4831,13 @@ static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,

if (coredump->reset_vram_lost)
drm_printf(&p, "VRAM is lost due to GPU reset!\n");
- if (coredump->adev->num_regs) {
+ if (coredump->adev->reset_info.num_regs) {
drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");

- for (i = 0; i < coredump->adev->num_regs; i++)
+ for (i = 0; i < coredump->adev->reset_info.num_regs; i++)
drm_printf(&p, "0x%08x: 0x%08x\n",
- coredump->adev->reset_dump_reg_list[i],
- coredump->adev->reset_dump_reg_value[i]);
+ coredump->adev->reset_info.reset_dump_reg_list[i],
+ coredump->adev->reset_info.reset_dump_reg_value[i]);
}

return count - iter.remain;
--
2.41.0