Re: [PATCH 3/3] hwmon: (coretemp) Fix core count limitation

From: Guenter Roeck
Date: Thu Nov 30 2023 - 22:06:26 EST


On 11/27/23 05:16, Zhang Rui wrote:
Currently, coretemp driver only supports 128 cores per package.
This loses some core temperation information on systems that have more
than 128 cores per package.
[ 58.685033] coretemp coretemp.0: Adding Core 128 failed
[ 58.692009] coretemp coretemp.0: Adding Core 129 failed

Fix the problem by using a per package list to maintain the per core
temp_data instead of the fixed length pdata->core_data[] array.

Signed-off-by: Zhang Rui <rui.zhang@xxxxxxxxx>
---
drivers/hwmon/coretemp.c | 110 ++++++++++++++++++---------------------
1 file changed, 52 insertions(+), 58 deletions(-)

diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index cef43fedbd58..1bb1a6e4b07b 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -39,11 +39,7 @@ static int force_tjmax;
module_param_named(tjmax, force_tjmax, int, 0444);
MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
-#define PKG_SYSFS_ATTR_NO 1 /* Sysfs attribute for package temp */
-#define BASE_SYSFS_ATTR_NO 2 /* Sysfs Base attr no for coretemp */
-#define NUM_REAL_CORES 128 /* Number of Real cores per cpu */
#define CORETEMP_NAME_LENGTH 28 /* String Length of attrs */
-#define MAX_CORE_DATA (NUM_REAL_CORES + BASE_SYSFS_ATTR_NO)
enum coretemp_attr_index {
ATTR_LABEL,
@@ -90,17 +86,17 @@ struct temp_data {
struct attribute *attrs[TOTAL_ATTRS + 1];
struct attribute_group attr_group;
struct mutex update_lock;
+ struct list_head node;
};
/* Platform Data per Physical CPU */
struct platform_data {
struct device *hwmon_dev;
u16 pkg_id;
- u16 cpu_map[NUM_REAL_CORES];
- struct ida ida;
struct cpumask cpumask;
- struct temp_data *core_data[MAX_CORE_DATA];
struct device_attribute name_attr;
+ struct mutex core_data_lock;
+ struct list_head core_data_list;
};
struct tjmax_pci {
@@ -491,6 +487,23 @@ static struct temp_data *init_temp_data(unsigned int cpu, int pkg_flag)
return tdata;
}
+static struct temp_data *get_tdata(struct platform_data *pdata, int cpu)
+{
+ struct temp_data *tdata;
+
+ mutex_lock(&pdata->core_data_lock);
+ list_for_each_entry(tdata, &pdata->core_data_list, node) {
+ if (cpu >= 0 && !tdata->is_pkg_data && tdata->cpu_core_id == topology_core_id(cpu))
+ goto found;
+ if (cpu < 0 && tdata->is_pkg_data)
+ goto found;
+ }

I really don't like this. With 128+ cores, it gets terribly expensive.

How about calculating the number of cores in the probe function and
allocating cpu_map[] and core_data[] instead ?

Thanks,
Guenter