Re: ACPI hotplug panic with current git head

From: Kenji Kaneshige
Date: Sun Jan 18 2009 - 22:11:54 EST


James Bottomley wrote:
> On Mon, 2009-01-19 at 10:10 +0900, Kenji Kaneshige wrote:
>> James Bottomley wrote:
>>> On Fri, 2009-01-16 at 15:07 +0900, Kenji Kaneshige wrote:
>>>>> It looks like acpi_pci_get_bridge_handle() is returning NULL, so this is
>>>>> the fix that works for me.
>>>>>
>>>> I'm sorry for troubling you, and thank you for your patience.
>>>>
>>>> The patch seems to avoid the kernel panic, but I still don't know
>>>> why acpi_pci_get_bridge_handle() returns NULL here. I assumed
>>>> it should return non-NULL value here. So I'd like to investigate
>>>> it more.
>>> Sure, Len and I couldn't work out why it was returning NULL on this box
>>> (other than that perhaps it doesn't have an ACPI entry). The two
>>> offending busses which trigger this are the two internal ones (which
>>> aren't hotplug). The layout of the box is:
>>>
>>> sparkweed:~# lspci -t
>>> -+-[0000:0c]---00.0
>>> +-[0000:0a]---00.0
>>> +-[0000:08]---00.0
>>> +-[0000:06]---00.0
>>> +-[0000:04]---00.0
>>> +-[0000:02]---00.0
>>> +-[0000:01]-+-00.0
>>> | +-01.0
>>> | +-01.1
>>> | \-02.0
>>> \-[0000:00]-+-00.0
>>> +-01.0
>>> +-03.0
>>> +-03.1
>>> +-03.2
>>> +-0f.0
>>> +-0f.1
>>> \-0f.3
>>> sparkweed:~# lspci
>>> 00:00.0 Host bridge: IBM Calgary PCI-X Host Bridge (rev 02)
>>> 00:01.0 VGA compatible controller: ATI Technologies Inc Radeon RV100 QY
>>> [Radeon 7000/VE]
>>> 00:03.0 USB Controller: NEC Corporation USB (rev 43)
>>> 00:03.1 USB Controller: NEC Corporation USB (rev 43)
>>> 00:03.2 USB Controller: NEC Corporation USB 2.0 (rev 04)
>>> 00:0f.0 Host bridge: Broadcom CSB6 South Bridge (rev a0)
>>> 00:0f.1 IDE interface: Broadcom CSB6 RAID/IDE Controller (rev a0)
>>> 00:0f.3 ISA bridge: Broadcom GCLE-2 Host Bridge
>>> 01:00.0 Host bridge: IBM Calgary PCI-X Host Bridge (rev 02)
>>> 01:01.0 Ethernet controller: Broadcom Corporation NetXtreme BCM5704
>>> Gigabit Ethernet (rev 10)
>>> 01:01.1 Ethernet controller: Broadcom Corporation NetXtreme BCM5704
>>> Gigabit Ethernet (rev 10)
>>> 01:02.0 SCSI storage controller: Adaptec AIC-9410W SAS (Razor ASIC
>>> non-RAID) (rev 08)
>>> 02:00.0 Host bridge: IBM Calgary PCI-X Host Bridge (rev 02)
>>> 04:00.0 Host bridge: IBM Calgary PCI-X Host Bridge (rev 02)
>>> 06:00.0 Host bridge: IBM Calgary PCI-X Host Bridge (rev 02)
>>> 08:00.0 Host bridge: IBM Calgary PCI-X Host Bridge (rev 02)
>>> 0a:00.0 Host bridge: IBM Calgary PCI-X Host Bridge (rev 02)
>>> 0c:00.0 Host bridge: IBM Calgary PCI-X Host Bridge (rev 02)
>>>
>>> And when I annotate the problem, the two busses returning NULL are
>>> 0000:00 and 0000:01
>>>
>> Thank you very much for the information. It seems there are
>> something special in the data structure of host bridge for
>> 0000:00 and 0000:01.
>
> Yes, len speculates the non hotplug buses are missing some acpi entries.
>
>> I'm making a debug patch now and will send it to you as soon
>> as possible. I'm sorry to trouble you, but could you try it
>> later.
>
> Sure ... I'm travelling this week, but the machine is usually remotely
> accessible.
>

I appreciate your kindness.

I'm sending the debug patch against 2.6.29-rc1 below. I'm also
sending it as an attachment. It also contains the code to prevent
the kernel panic from you. Please note that you will see two
WARN_ON(1) messages with the patch. Those are shown by my debug
patch.

Could you try it and send me the whole dmsg output?

Thanks,
Kenji Kaneshige

drivers/pci/hotplug/acpiphp_glue.c | 18 ++++++++++++++++++
include/linux/pci-acpi.h | 25 ++++++++++++++++++++++---
2 files changed, 40 insertions(+), 3 deletions(-)

Index: linux-2.6.29-rc1/include/linux/pci-acpi.h
===================================================================
--- linux-2.6.29-rc1.orig/include/linux/pci-acpi.h 2008-12-01 22:59:52.000000000 +0900
+++ linux-2.6.29-rc1/include/linux/pci-acpi.h 2009-01-19 10:27:13.000000000 +0900
@@ -65,9 +65,28 @@
{
int seg = pci_domain_nr(pbus), busnr = pbus->number;
struct pci_dev *bridge = pbus->self;
- if (bridge)
- return DEVICE_ACPI_HANDLE(&(bridge->dev));
- return acpi_get_pci_rootbridge_handle(seg, busnr);
+ acpi_handle handle;
+ char objname[64] = "<NULL>";
+ struct acpi_buffer buffer = { .length = sizeof(objname),
+ .pointer = objname };
+
+ printk(KERN_INFO "%s: bridge(pbus->self) = %p, pbus->parent = %p\n",
+ __func__, bridge, pbus->parent);
+
+ if (bridge) {
+ printk(KERN_INFO "%s: handle the bridge as PtoP.\n", __func__);
+ handle = DEVICE_ACPI_HANDLE(&(bridge->dev));
+ } else {
+ printk(KERN_INFO "%s: handle the bridge as root.\n", __func__);
+ handle = acpi_get_pci_rootbridge_handle(seg, busnr);
+ }
+
+ if (handle)
+ acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer);
+ printk(KERN_INFO "%s: ACPI handle of the bridge for %04x:%02x: %p %s\n",
+ __func__, seg, busnr, handle, objname);
+
+ return handle;
}
#else
#if !defined(AE_ERROR)
Index: linux-2.6.29-rc1/drivers/pci/hotplug/acpiphp_glue.c
===================================================================
--- linux-2.6.29-rc1.orig/drivers/pci/hotplug/acpiphp_glue.c 2008-12-01 22:59:46.000000000 +0900
+++ linux-2.6.29-rc1/drivers/pci/hotplug/acpiphp_glue.c 2009-01-19 10:32:05.000000000 +0900
@@ -266,6 +266,12 @@
int found = acpi_pci_detect_ejectable(pbus);
if (!found) {
acpi_handle bridge_handle = acpi_pci_get_bridge_handle(pbus);
+ if (!bridge_handle) {
+ printk(KERN_INFO
+ "%s: NULL handle returned!!!\n", __func__);
+ WARN_ON(1);
+ return 0;
+ }
acpi_walk_namespace(ACPI_TYPE_DEVICE, bridge_handle, (u32)1,
is_pci_dock_device, (void *)&found, NULL);
}
@@ -459,6 +465,9 @@
int device, function;
struct pci_dev *dev;
struct pci_bus *pci_bus = context;
+ char objname[64];
+ struct acpi_buffer buffer = { .length = sizeof(objname),
+ .pointer = objname };

status = acpi_get_handle(handle, "_ADR", &dummy_handle);
if (ACPI_FAILURE(status))
@@ -478,6 +487,9 @@
if (!dev || !dev->subordinate)
goto out;

+ acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer);
+ printk(KERN_INFO "%s: Detecting slots under PtoP bridge (%s)\n",
+ __func__, objname);
/* check if this bridge has ejectable slots */
if ((detect_ejectable_slots(dev->subordinate) > 0)) {
dbg("found PCI-to-PCI bridge at PCI %s\n", pci_name(dev));
@@ -504,6 +516,9 @@
int seg, bus;
acpi_handle dummy_handle;
struct pci_bus *pci_bus;
+ char objname[64];
+ struct acpi_buffer buffer = { .length = sizeof(objname),
+ .pointer = objname };

/* if the bridge doesn't have _STA, we assume it is always there */
status = acpi_get_handle(handle, "_STA", &dummy_handle);
@@ -539,6 +554,9 @@
return 0;
}

+ acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer);
+ printk(KERN_INFO "%s: Detecting slots under root bridge (%s)\n",
+ __func__, objname);
/* check if this bridge has ejectable slots */
if (detect_ejectable_slots(pci_bus) > 0) {
dbg("found PCI host-bus bridge with hot-pluggable slots\n");

drivers/pci/hotplug/acpiphp_glue.c | 18 ++++++++++++++++++
include/linux/pci-acpi.h | 25 ++++++++++++++++++++++---
2 files changed, 40 insertions(+), 3 deletions(-)

Index: linux-2.6.29-rc1/include/linux/pci-acpi.h
===================================================================
--- linux-2.6.29-rc1.orig/include/linux/pci-acpi.h 2008-12-01 22:59:52.000000000 +0900
+++ linux-2.6.29-rc1/include/linux/pci-acpi.h 2009-01-19 10:27:13.000000000 +0900
@@ -65,9 +65,28 @@
{
int seg = pci_domain_nr(pbus), busnr = pbus->number;
struct pci_dev *bridge = pbus->self;
- if (bridge)
- return DEVICE_ACPI_HANDLE(&(bridge->dev));
- return acpi_get_pci_rootbridge_handle(seg, busnr);
+ acpi_handle handle;
+ char objname[64] = "<NULL>";
+ struct acpi_buffer buffer = { .length = sizeof(objname),
+ .pointer = objname };
+
+ printk(KERN_INFO "%s: bridge(pbus->self) = %p, pbus->parent = %p\n",
+ __func__, bridge, pbus->parent);
+
+ if (bridge) {
+ printk(KERN_INFO "%s: handle the bridge as PtoP.\n", __func__);
+ handle = DEVICE_ACPI_HANDLE(&(bridge->dev));
+ } else {
+ printk(KERN_INFO "%s: handle the bridge as root.\n", __func__);
+ handle = acpi_get_pci_rootbridge_handle(seg, busnr);
+ }
+
+ if (handle)
+ acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer);
+ printk(KERN_INFO "%s: ACPI handle of the bridge for %04x:%02x: %p %s\n",
+ __func__, seg, busnr, handle, objname);
+
+ return handle;
}
#else
#if !defined(AE_ERROR)
Index: linux-2.6.29-rc1/drivers/pci/hotplug/acpiphp_glue.c
===================================================================
--- linux-2.6.29-rc1.orig/drivers/pci/hotplug/acpiphp_glue.c 2008-12-01 22:59:46.000000000 +0900
+++ linux-2.6.29-rc1/drivers/pci/hotplug/acpiphp_glue.c 2009-01-19 10:32:05.000000000 +0900
@@ -266,6 +266,12 @@
int found = acpi_pci_detect_ejectable(pbus);
if (!found) {
acpi_handle bridge_handle = acpi_pci_get_bridge_handle(pbus);
+ if (!bridge_handle) {
+ printk(KERN_INFO
+ "%s: NULL handle returned!!!\n", __func__);
+ WARN_ON(1);
+ return 0;
+ }
acpi_walk_namespace(ACPI_TYPE_DEVICE, bridge_handle, (u32)1,
is_pci_dock_device, (void *)&found, NULL);
}
@@ -459,6 +465,9 @@
int device, function;
struct pci_dev *dev;
struct pci_bus *pci_bus = context;
+ char objname[64];
+ struct acpi_buffer buffer = { .length = sizeof(objname),
+ .pointer = objname };

status = acpi_get_handle(handle, "_ADR", &dummy_handle);
if (ACPI_FAILURE(status))
@@ -478,6 +487,9 @@
if (!dev || !dev->subordinate)
goto out;

+ acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer);
+ printk(KERN_INFO "%s: Detecting slots under PtoP bridge (%s)\n",
+ __func__, objname);
/* check if this bridge has ejectable slots */
if ((detect_ejectable_slots(dev->subordinate) > 0)) {
dbg("found PCI-to-PCI bridge at PCI %s\n", pci_name(dev));
@@ -504,6 +516,9 @@
int seg, bus;
acpi_handle dummy_handle;
struct pci_bus *pci_bus;
+ char objname[64];
+ struct acpi_buffer buffer = { .length = sizeof(objname),
+ .pointer = objname };

/* if the bridge doesn't have _STA, we assume it is always there */
status = acpi_get_handle(handle, "_STA", &dummy_handle);
@@ -539,6 +554,9 @@
return 0;
}

+ acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer);
+ printk(KERN_INFO "%s: Detecting slots under root bridge (%s)\n",
+ __func__, objname);
/* check if this bridge has ejectable slots */
if (detect_ejectable_slots(pci_bus) > 0) {
dbg("found PCI host-bus bridge with hot-pluggable slots\n");