Re: power-off delay/hang due to commit 6d25be57 (mainline)

From: Sebastian Andrzej Siewior
Date: Tue Jul 14 2020 - 10:11:41 EST


On 2020-07-14 15:54:57 [+0200], Rafael J. Wysocki wrote:
> On Tue, Jul 14, 2020 at 3:44 PM Sebastian Andrzej Siewior
> <bigeasy@xxxxxxxxxxxxx> wrote:>
> > On 2020-06-24 23:49:52 [+0200], Stephen Berman wrote:
> >
> > Let me summarize the thread here:
> >
> > On Stephen's system, ACPI informs the thermal zone driver to poll the
> > temperature every second and the driver does so.
> > The driver queries the temperature by invoking acpi_evaluate_integer()
> > which invokes (at some point) acpi_ev_queue_notify_request().
>
> Well, I don't quite see how acpi_ev_queue_notify_request() can be
> invoked from the acpi_evaluate_integer() code path.
>
> Do you have a call trace showing that?

So the trace in
https://lore.kernel.org/linux-acpi/87o8pjh1i0.fsf@xxxxxxx/

shows the pattern and we nailed it down that it comes from
thermal_get_temp(). I assumed acpi_ex_opcode_2A_0T_0R() since the other
candidate was acpi_ev_asynch_execute_gpe_method().

Stephen, the patch attached adds a WARN_ON() statement which will
produce a stack trace (4 or so). Could please run 'dmesg' after a while
and send it back. There should be a
"WARNING in drivers/acpi/acpica/evmisc.c"

statement or something along the lines.

Rafael, are you also interested in an ACPI dump?

> Cheers!

Sebastian
diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c
index 3e39907fedd9f..816e23d103c63 100644
--- a/drivers/acpi/acpica/evgpe.c
+++ b/drivers/acpi/acpica/evgpe.c
@@ -477,6 +477,7 @@ static void ACPI_SYSTEM_XFACE acpi_ev_asynch_execute_gpe_method(void *context)
* notifies on multiple device objects.
*/
notify = gpe_event_info->dispatch.notify_list;
+ trace_printk("notify %px\n", gpe_event_info);
while (ACPI_SUCCESS(status) && notify) {
status =
acpi_ev_queue_notify_request(notify->device_node,
@@ -484,6 +485,7 @@ static void ACPI_SYSTEM_XFACE acpi_ev_asynch_execute_gpe_method(void *context)

notify = notify->next;
}
+ trace_printk("Event notify %px done %x\n", gpe_event_info, status);

break;

@@ -491,6 +493,9 @@ static void ACPI_SYSTEM_XFACE acpi_ev_asynch_execute_gpe_method(void *context)

/* Allocate the evaluation information block */

+ trace_printk("Evaluate %4.4s\n",
+ acpi_ut_get_node_name(gpe_event_info->dispatch.method_node));
+
info = ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_evaluate_info));
if (!info) {
status = AE_NO_MEMORY;
diff --git a/drivers/acpi/acpica/evmisc.c b/drivers/acpi/acpica/evmisc.c
index ce1eda6beb845..d8e3f426729db 100644
--- a/drivers/acpi/acpica/evmisc.c
+++ b/drivers/acpi/acpica/evmisc.c
@@ -72,6 +72,7 @@ acpi_ev_queue_notify_request(struct acpi_namespace_node *node, u32 notify_value)
union acpi_generic_state *info;
u8 handler_list_id = 0;
acpi_status status = AE_OK;
+ static int num_req = 0;

ACPI_FUNCTION_NAME(ev_queue_notify_request);

@@ -129,19 +130,20 @@ acpi_ev_queue_notify_request(struct acpi_namespace_node *node, u32 notify_value)
info->notify.handler_list_head = handler_list_head;
info->notify.global = &acpi_gbl_global_notify[handler_list_id];

- ACPI_DEBUG_PRINT((ACPI_DB_INFO,
- "Dispatching Notify on [%4.4s] (%s) Value 0x%2.2X (%s) Node %p\n",
+ trace_printk("Dispatching Notify on [%4.4s] (%s) Value 0x%2.2X (%s) Node %px\n",
acpi_ut_get_node_name(node),
acpi_ut_get_type_name(node->type), notify_value,
acpi_ut_get_notify_name(notify_value, ACPI_TYPE_ANY),
- node));
+ node);
+ WARN_ON(num_req > 10 && num_req < 15);

status = acpi_os_execute(OSL_NOTIFY_HANDLER,
acpi_ev_notify_dispatch, info);
if (ACPI_FAILURE(status)) {
acpi_ut_delete_generic_state(info);
}
-
+ if (num_req++ == 300)
+ tracing_off();
return (status);
}

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 762c5d50b8fe2..67d5ac9aa433d 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -844,6 +844,16 @@ static void acpi_os_execute_deferred(struct work_struct *work)
kfree(dpc);
}

+static void acpi_os_execute_deferred_notify(struct work_struct *work)
+{
+ struct acpi_os_dpc *dpc = container_of(work, struct acpi_os_dpc, work);
+
+ trace_printk("Start %px %pS(%px)\n", dpc, dpc->function, dpc->context);
+ dpc->function(dpc->context);
+ trace_printk("End %px %pS(%px)\n", dpc, dpc->function, dpc->context);
+ kfree(dpc);
+}
+
#ifdef CONFIG_ACPI_DEBUGGER
static struct acpi_debugger acpi_debugger;
static bool acpi_debugger_initialized;
@@ -1096,7 +1106,8 @@ acpi_status acpi_os_execute(acpi_execute_type type,
*/
if (type == OSL_NOTIFY_HANDLER) {
queue = kacpi_notify_wq;
- INIT_WORK(&dpc->work, acpi_os_execute_deferred);
+ trace_printk("Adding %pS %px <%px>\n", function, context, dpc);
+ INIT_WORK(&dpc->work, acpi_os_execute_deferred_notify);
} else if (type == OSL_GPE_HANDLER) {
queue = kacpid_wq;
INIT_WORK(&dpc->work, acpi_os_execute_deferred);
diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 19067a5e52934..883eb37592639 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -191,7 +191,9 @@ static int acpi_thermal_get_temperature(struct acpi_thermal *tz)

tz->last_temperature = tz->temperature;

+ printk("Query temp\n");
status = acpi_evaluate_integer(tz->device->handle, "_TMP", NULL, &tmp);
+ printk("Query temp done: %d\n", status);
if (ACPI_FAILURE(status))
return -ENODEV;