Re: [PATCH 1/1] platform/x86/intel/pmc: Improve PKGC residency counters debug

From: Ilpo Järvinen
Date: Thu Mar 07 2024 - 08:06:46 EST


On Thu, 7 Mar 2024, Kane Chen wrote:

> The current code only prints PKGC-10 residency when the PKGC-10
> is not reached in previous 'freeze' attempt. To debug PKGC-10 issues, we
> also need to know other PKGC residency counters to better triage issues.
> Ex:
> 1. When system is stuck in PC2, it can be caused short LTR from device.
> 2. When system is stuck in PC8, it can be caused by display engine.
>
> To better triage issues, all PKGC residency are needed when issues happens.

happen.

> Example log:
> CPU did not enter Package C10!!! (Package C10 cnt=0x0)
> Prev Package C2 cnt = 0x2191a325de, Current Package C2 cnt = 0x21aba30724
> Prev Package C3 cnt = 0x0, Current Package C3 cnt = 0x0
> Prev Package C6 cnt = 0x0, Current Package C6 cnt = 0x0
> Prev Package C7 cnt = 0x0, Current Package C7 cnt = 0x0
> Prev Package C8 cnt = 0x0, Current Package C8 cnt = 0x0
> Prev Package C9 cnt = 0x0, Current Package C9 cnt = 0x0
> Prev Package C10 cnt = 0x0, Current Package C10 cnt = 0x0
>
> With this log, we can know whether it's a stuck PC2 issue, and we can
> check whether the short LTR from device causes the issue.
>
> Signed-off-by: Kane Chen <kane.chen@xxxxxxxxx>
> ---
> drivers/platform/x86/intel/pmc/core.c | 47 ++++++++++++++++++++-------
> drivers/platform/x86/intel/pmc/core.h | 7 ++--
> 2 files changed, 41 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c
> index 8f9c036809c79..b8910b671667e 100644
> --- a/drivers/platform/x86/intel/pmc/core.c
> +++ b/drivers/platform/x86/intel/pmc/core.c
> @@ -1389,6 +1389,15 @@ static int pmc_core_probe(struct platform_device *pdev)
> return -ENOMEM;
> pmcdev->pmcs[PMC_IDX_MAIN] = primary_pmc;
>
> + /* The last element in msr_map is empty */
> + pmcdev->num_of_pkgc = ARRAY_SIZE(msr_map) - 1;
> + pmcdev->pkgc_res_cnt = devm_kcalloc(&pdev->dev,
> + pmcdev->num_of_pkgc,
> + sizeof(*pmcdev->pkgc_res_cnt),
> + GFP_KERNEL);
> + if (!pmcdev->pkgc_res_cnt)
> + return -ENOMEM;
> +
> /*
> * Coffee Lake has CPU ID of Kaby Lake and Cannon Lake PCH. So here
> * Sunrisepoint PCH regmap can't be used. Use Cannon Lake PCH regmap
> @@ -1432,6 +1441,7 @@ static __maybe_unused int pmc_core_suspend(struct device *dev)
> {
> struct pmc_dev *pmcdev = dev_get_drvdata(dev);
> struct pmc *pmc = pmcdev->pmcs[PMC_IDX_MAIN];
> + int i;

unsigned int.

> if (pmcdev->suspend)
> pmcdev->suspend(pmcdev);
> @@ -1440,9 +1450,11 @@ static __maybe_unused int pmc_core_suspend(struct device *dev)
> if (pm_suspend_via_firmware())
> return 0;
>
> - /* Save PC10 residency for checking later */
> - if (rdmsrl_safe(MSR_PKG_C10_RESIDENCY, &pmcdev->pc10_counter))
> - return -EIO;
> + /* Save PKGC residency for checking later */
> + for (i = 0; i < pmcdev->num_of_pkgc; i++) {
> + if (rdmsrl_safe(msr_map[i].bit_mask, &pmcdev->pkgc_res_cnt[i]))
> + return -EIO;
> + }
>
> /* Save S0ix residency for checking later */
> if (pmc_core_dev_state_get(pmc, &pmcdev->s0ix_counter))
> @@ -1451,14 +1463,15 @@ static __maybe_unused int pmc_core_suspend(struct device *dev)
> return 0;
> }
>
> -static inline bool pmc_core_is_pc10_failed(struct pmc_dev *pmcdev)
> +static inline bool pmc_core_is_deepest_pkgc_failed(struct pmc_dev *pmcdev)
> {
> - u64 pc10_counter;
> + u32 deepest_pkgc_msr = msr_map[pmcdev->num_of_pkgc - 1].bit_mask;
> + u64 deepest_pkgc_residency;
>
> - if (rdmsrl_safe(MSR_PKG_C10_RESIDENCY, &pc10_counter))
> + if (rdmsrl_safe(deepest_pkgc_msr, &deepest_pkgc_residency))
> return false;
>
> - if (pc10_counter == pmcdev->pc10_counter)
> + if (deepest_pkgc_residency == pmcdev->pkgc_res_cnt[pmcdev->num_of_pkgc - 1])
> return true;
>
> return false;
> @@ -1497,10 +1510,22 @@ int pmc_core_resume_common(struct pmc_dev *pmcdev)
> if (!warn_on_s0ix_failures)
> return 0;
>
> - if (pmc_core_is_pc10_failed(pmcdev)) {
> - /* S0ix failed because of PC10 entry failure */
> - dev_info(dev, "CPU did not enter PC10!!! (PC10 cnt=0x%llx)\n",
> - pmcdev->pc10_counter);
> + if (pmc_core_is_deepest_pkgc_failed(pmcdev)) {
> + /* S0ix failed because of deepest PKGC entry failure */
> + dev_info(dev, "CPU did not enter %s!!! (%s cnt=0x%llx)\n",
> + msr_map[pmcdev->num_of_pkgc - 1].name,
> + msr_map[pmcdev->num_of_pkgc - 1].name,
> + pmcdev->pkgc_res_cnt[pmcdev->num_of_pkgc - 1]);
> +
> + for (i = 0; i < pmcdev->num_of_pkgc; i++) {
> + u64 pc_cnt;
> +
> + if (!rdmsrl_safe(msr_map[i].bit_mask, &pc_cnt)) {
> + dev_info(dev, "Prev %s cnt = 0x%llx, Current %s cnt = 0x%llx\n",
> + msr_map[i].name, pmcdev->pkgc_res_cnt[i],
> + msr_map[i].name, pc_cnt);
> + }
> + }
> return 0;
> }
>
> diff --git a/drivers/platform/x86/intel/pmc/core.h b/drivers/platform/x86/intel/pmc/core.h
> index 54137faaae2b2..fd7ae76984ac7 100644
> --- a/drivers/platform/x86/intel/pmc/core.h
> +++ b/drivers/platform/x86/intel/pmc/core.h
> @@ -385,7 +385,8 @@ struct pmc {
> * @pmc_xram_read_bit: flag to indicate whether PMC XRAM shadow registers
> * used to read MPHY PG and PLL status are available
> * @mutex_lock: mutex to complete one transcation
> - * @pc10_counter: PC10 residency counter
> + * @pkgc_res_cnt: PKGC residency counter

Array of PKGC residency counters


With those addressed, feel free to add:

Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@xxxxxxxxxxxxxxx>


--
i.