Re: [PATCH 2/2] PCI/portdrv: Place PCIe port hierarchy into D3cold at shutdown

From: Kai-Heng Feng
Date: Wed Dec 13 2023 - 22:48:22 EST


Hi Mario and Rafael,

On Thu, Dec 14, 2023 at 2:46 AM Rafael J. Wysocki <rafael@xxxxxxxxxx> wrote:
>
> On Wed, Dec 13, 2023 at 7:42 PM Mario Limonciello
> <mario.limonciello@xxxxxxx> wrote:
> >
> > On 12/13/2023 12:38, Rafael J. Wysocki wrote:
> > > On Wed, Dec 13, 2023 at 7:27 PM Mario Limonciello
> > > <mario.limonciello@xxxxxxx> wrote:
> > >>
> > >> When a system is being powered off it's important that PCIe ports
> > >> have been put into D3cold as there is no other software to turn
> > >> off the devices at S5.
> > >>
> > >> If PCIe ports are left in D0 then any GPIOs toggled by the ACPI
> > >> power resources may be left enabled and devices may consume excess
> > >> power.
> > >
> > > Isn't that a platform firmware issue?
> > >
> > > It is the responsibility of the platform firmware to properly put the
> > > platform into S5, including power removal from devices that are not
> > > armed for power-on.
> >
> > The specific issues that triggered this series were tied to the PCIe
> > ports for dGPUs. There is a GPIO that is toggled by _ON or _OFF.
> >
> > Windows calls _OFF as part of S5..
>
> I see.
>
> > >
> > >> Cc: mpearson-lenovo@xxxxxxxxx
> > >> Signed-off-by: Mario Limonciello <mario.limonciello@xxxxxxx>
> > >> ---
> > >> drivers/pci/pcie/portdrv.c | 11 ++++++++---
> > >> 1 file changed, 8 insertions(+), 3 deletions(-)
> > >>
> > >> diff --git a/drivers/pci/pcie/portdrv.c b/drivers/pci/pcie/portdrv.c
> > >> index 14a4b89a3b83..08238680c481 100644
> > >> --- a/drivers/pci/pcie/portdrv.c
> > >> +++ b/drivers/pci/pcie/portdrv.c
> > >> @@ -734,9 +734,14 @@ static void pcie_portdrv_remove(struct pci_dev *dev)
> > >> static void pcie_portdrv_shutdown(struct pci_dev *dev)
> > >> {
> > >> if (pci_bridge_d3_possible(dev)) {
> > >> - pm_runtime_forbid(&dev->dev);
> > >> - pm_runtime_get_noresume(&dev->dev);
> > >> - pm_runtime_dont_use_autosuspend(&dev->dev);
> > >> + /* whole hierarchy goes into a low power state for S5 */
> > >> + if (system_state == SYSTEM_POWER_OFF) {
> > >> + pci_set_power_state(dev, PCI_D3cold);
> > >> + } else {
> > >> + pm_runtime_forbid(&dev->dev);
> > >> + pm_runtime_get_noresume(&dev->dev);
> > >> + pm_runtime_dont_use_autosuspend(&dev->dev);
> > >> + }
> > >> }
> > >
> > > Wouldn't it be better to remove power from the port after running the
> > > code below?
> > >
> >
> > Yes; I think you're right. I'll do some more testing with this.
> >
> > >> pcie_port_device_remove(dev);
> > >> --
>
> IIRC, to do this all properly, you'd need to rework the shutdown path
> to look like the hibernation power-off one. Or even use the latter
> for shutdown?
>
> There was no reason to do that till now, so it has not been done, but
> it looks like you have one.
>

I am working on exactly same thing but with a different approach.
Because this is needed for more than just PCI devices.
I haven't written a proper commit message yet, but the implementation
is quite simple:

diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index f007116a8427..b90c6cf6faf4 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -967,15 +967,17 @@ EXPORT_SYMBOL_GPL(acpi_pm_set_device_wakeup);
* @adev: ACPI device node corresponding to @dev.
* @system_state: System state to choose the device state for.
*/
-static int acpi_dev_pm_low_power(struct device *dev, struct acpi_device *adev,
- u32 system_state)
+static int acpi_dev_pm_low_power(struct acpi_device *adev, void* data)
{
int ret, state;
+ u32 *system_state = data;

if (!acpi_device_power_manageable(adev))
return 0;

- ret = acpi_dev_pm_get_state(dev, adev, system_state, NULL, &state);
+ acpi_dev_for_each_child(adev, acpi_dev_pm_low_power, data);
+
+ ret = acpi_dev_pm_get_state(&adev->dev, adev, *system_state, NULL, &state);
return ret ? ret : acpi_device_set_power(adev, state);
}

@@ -1016,7 +1018,7 @@ int acpi_dev_suspend(struct device *dev, bool wakeup)
wakeup = false;
}

- error = acpi_dev_pm_low_power(dev, adev, target_state);
+ error = acpi_dev_pm_low_power(adev, &target_state);
if (error && wakeup)
acpi_device_wakeup_disable(adev);

@@ -1386,6 +1388,7 @@ static struct dev_pm_domain acpi_general_pm_domain = {
static void acpi_dev_pm_detach(struct device *dev, bool power_off)
{
struct acpi_device *adev = ACPI_COMPANION(dev);
+ u32 state = ACPI_STATE_S0;

if (adev && dev->pm_domain == &acpi_general_pm_domain) {
dev_pm_domain_set(dev, NULL);
@@ -1400,7 +1403,7 @@ static void acpi_dev_pm_detach(struct device
*dev, bool power_off)
dev_pm_qos_hide_latency_limit(dev);
dev_pm_qos_hide_flags(dev);
acpi_device_wakeup_disable(adev);
- acpi_dev_pm_low_power(dev, adev, ACPI_STATE_S0);
+ acpi_dev_pm_low_power(adev, &state);
}
}
}
@@ -1514,4 +1517,16 @@ bool acpi_dev_state_d0(struct device *dev)
}
EXPORT_SYMBOL_GPL(acpi_dev_state_d0);

+void acpi_dev_shutdown(struct device *dev)
+{
+ struct acpi_device *adev = ACPI_COMPANION(dev);
+ u32 state = ACPI_STATE_S5;
+
+ if (!adev)
+ return;
+
+ acpi_device_wakeup_disable(adev);
+ acpi_dev_pm_low_power(adev, &state);
+}
+EXPORT_SYMBOL_GPL(acpi_dev_shutdown);
#endif /* CONFIG_PM */
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 6ceaf50f5a67..7e7c99eade63 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -45,6 +45,15 @@ static void __fw_devlink_link_to_consumers(struct
device *dev);
static bool fw_devlink_drv_reg_done;
static bool fw_devlink_best_effort;

+#ifdef CONFIG_ACPI
+static inline void fw_dev_shutdown(struct device *dev)
+{
+ acpi_dev_shutdown(dev);
+}
+#else
+static inline void fw_dev_shutdown(struct device *dev) { }
+#endif
+
/**
* __fwnode_link_add - Create a link between two fwnode_handles.
* @con: Consumer end of the link.
@@ -4780,6 +4789,8 @@ void device_shutdown(void)
dev->driver->shutdown(dev);
}

+ fw_dev_shutdown(dev);
+
device_unlock(dev);
if (parent)
device_unlock(parent);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 641dc4843987..374f9eb75c22 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1130,6 +1130,7 @@ int acpi_subsys_runtime_resume(struct device *dev);
int acpi_dev_pm_attach(struct device *dev, bool power_on);
bool acpi_storage_d3(struct device *dev);
bool acpi_dev_state_d0(struct device *dev);
+void acpi_dev_shutdown(struct device *dev);
#else
static inline int acpi_subsys_runtime_suspend(struct device *dev) { return 0; }
static inline int acpi_subsys_runtime_resume(struct device *dev) { return 0; }
@@ -1145,6 +1146,7 @@ static inline bool acpi_dev_state_d0(struct device *dev)
{
return true;
}
+static inline void acpi_dev_shutdown(struct device *dev) { }
#endif

#if defined(CONFIG_ACPI) && defined(CONFIG_PM_SLEEP)