[PATCH] [WORKAROUND] CONFIG_PREEMPT_RT and ib_umad_close() issue

From: John Blackwood
Date: Mon Sep 17 2007 - 11:23:19 EST


When using OFED-1.2.5 based infiniband kernel modules on 2.6.22 based
kernels with the Ingo Molnar CONFIG_PREEMPT_RT applied, then commands such as ibnetdiscvoer, smpquery, sminfo, etc. will hang. The problem is with the downgrade_write() rw semaphore usage in the ib_umad_close() routine.

This patch is a temporary work-around that gets around this
issue by changing the ib_umad_port mutex from a rw_semaphore to a
compat_rw_semaphore.

This is admittedly only a temporary solution.

An example of the BUG console message output and work around
patch are shown below.


bowser> ------------[ cut here ]------------
kernel BUG at kernel/rt.c:352!
invalid opcode: 0000 [#1]
PREEMPT SMP
last sysfs file: /class/infiniband_mad/umad0/port
Modules linked in: rdma_ucm(F) rds(F) ib_ucm(F) ib_srp(F) ib_sdp(F) rdma_cm(F) iw_cm(F) ib_addr(F) ib_ipoib(F) ib_cm(F) ib_sa(F) ib_uverbs(F) ib_umad(F) ib_mthca(F) ib_mad(F) ib_core(F)
CPU: 1
EIP: 0060:[<c014d160>] Tainted: GF N VLI
EFLAGS: 00210282 (2.6.22.6-rt_shield_trace #1)
EIP is at rt_downgrade_write+0x0/0x10
eax: f705df7c ebx: 00000008 ecx: f7171014 edx: f705df9c
esi: f7170ffc edi: f7171000 ebp: f7171004 esp: f5fcdef0
ds: 007b es: 007b fs: 00d8 gs: 0000 ss: 0068 preempt:00000001
Process ibnetdiscover (pid: 10842, ti=f5fcc000 task=f6cd00f0 task.ti=f5fcc000)
Stack: f883a4a8 f705df40 00000000 00000008 f6ca1680 f63adf20 f734e7dc c018c240
00000000 00000000 f734e7dc c2d442c0 f63adf20 f6ca1680 f71d06c0 00000000
00000001 c018a2ec 00000000 00000001 00000003 f44af440 c012bd20 f71d06c0
Call Trace:
[<f883a4a8>] ib_umad_close+0x98/0xf0 [ib_umad]
[<c018c240>] __fput+0x170/0x1a0
[<c018a2ec>] filp_close+0x3c/0x80
[<c012bd20>] close_files+0x50/0x60
[<c012bd98>] put_files_struct+0x28/0x80
[<c012c996>] do_exit+0x1c6/0x570
[<c018b1ca>] sys_write+0x6a/0xf0
[<c012cd96>] do_group_exit+0x26/0x70
[<c01054ff>] sysenter_past_esp+0x68/0x99
=======================
---------------------------
| preempt count: 00000001 ]
| 1-level deep critical section nesting:
----------------------------------------
.. [<c06653b9>] .... __spin_lock_irqsave+0x19/0x50
.....[<00000000>] .. ( <= 0x0)

Code: 5b e9 45 7a 51 00 90 8d 74 26 00 8b 53 1c 85 d2 74 e3 89 d8 89 ca e8 c0 84 51 00 ff 4b 1c 5b c3 8d 74 26 00 8d bc 27 00 00 00 00 <0f> 0b eb fe 8d b6 00 00 00 00 8d bf 00 00 00 00 e8 db 79 51 00
EIP: [<c014d160>] rt_downgrade_write+0x0/0x10 SS:ESP 0068:f5fcdef0
BUG: sleeping function called from invalid context ibnetdiscover(10842) at kernel/rtmutex.c:636
in_atomic():1 [00000001], irqs_disabled():1
[<c0126661>] __might_sleep+0xe1/0x100
[<c036850b>] set_palette+0x2b/0x60
[<c0664f66>] __rt_spin_lock+0x36/0x50
[<c01241be>] __wake_up+0x1e/0x70
[<c012a44b>] wake_up_klogd+0x3b/0x40
[<c0106f76>] die+0x166/0x240
[<c0665c31>] do_trap+0x1b1/0x260
[<c01388a7>] raw_notifier_call_chain+0x17/0x20
[<c0144040>] notify_die+0x30/0x40
[<c01071e0>] do_invalid_op+0x0/0x90
[<c0107263>] do_invalid_op+0x83/0x90
[<c014d160>] rt_downgrade_write+0x0/0x10
[<c01575c2>] add_preempt_count+0x12/0xe0
[<c01575c2>] add_preempt_count+0x12/0xe0
[<c0664f66>] __rt_spin_lock+0x36/0x50
[<c030e345>] lock_list_del_init+0x55/0x80
[<c018c66d>] file_kill+0x18d/0x1a0
[<c06658c2>] error_code+0x72/0x80
[<c015007b>] load_module+0x33b/0xe10
[<c014d160>] rt_downgrade_write+0x0/0x10
[<f883a4a8>] ib_umad_close+0x98/0xf0 [ib_umad]
[<c018c240>] __fput+0x170/0x1a0
[<c018a2ec>] filp_close+0x3c/0x80
[<c012bd20>] close_files+0x50/0x60
[<c012bd98>] put_files_struct+0x28/0x80
[<c012c996>] do_exit+0x1c6/0x570
[<c018b1ca>] sys_write+0x6a/0xf0
[<c012cd96>] do_group_exit+0x26/0x70
[<c01054ff>] sysenter_past_esp+0x68/0x99
=======================
---------------------------
| preempt count: 00000001 ]
| 1-level deep critical section nesting:
----------------------------------------
.. [<c06653b9>] .... __spin_lock_irqsave+0x19/0x50
.....[<00000000>] .. ( <= 0x0)

Fixing recursive fault but reboot is needed!




--- linux-2.6.22/drivers/infiniband/core/user_mad.c 2007-09-17 09:48:45.000000000 -0400
+++ new/drivers/infiniband/core/user_mad.c 2007-09-17 09:50:41.000000000 -0400
@@ -93,7 +93,7 @@ struct ib_umad_port {
struct class_device *sm_class_dev;
struct semaphore sm_sem;

- struct rw_semaphore mutex;
+ struct compat_rw_semaphore mutex;
struct list_head file_list;

struct ib_device *ib_dev;
@@ -159,7 +159,7 @@ static int queue_packet(struct ib_umad_f
{
int ret = 1;

- down_read(&file->port->mutex);
+ compat_down_read(&file->port->mutex);

for (packet->mad.hdr.id = 0;
packet->mad.hdr.id < IB_UMAD_MAX_AGENTS;
@@ -173,7 +173,7 @@ static int queue_packet(struct ib_umad_f
break;
}

- up_read(&file->port->mutex);
+ compat_up_read(&file->port->mutex);

return ret;
}
@@ -461,7 +461,7 @@ static ssize_t ib_umad_write(struct file
goto err;
}

- down_read(&file->port->mutex);
+ compat_down_read(&file->port->mutex);

agent = __get_agent(file, packet->mad.hdr.id);
if (!agent) {
@@ -558,7 +558,7 @@ static ssize_t ib_umad_write(struct file
if (ret)
goto err_send;

- up_read(&file->port->mutex);
+ compat_up_read(&file->port->mutex);
return count;

err_send:
@@ -568,7 +568,7 @@ err_msg:
err_ah:
ib_destroy_ah(ah);
err_up:
- up_read(&file->port->mutex);
+ compat_up_read(&file->port->mutex);
err:
kfree(packet);
return ret;
@@ -597,7 +597,7 @@ static int ib_umad_reg_agent(struct ib_u
int agent_id;
int ret;

- down_write(&file->port->mutex);
+ compat_down_write(&file->port->mutex);

if (!file->port->ib_dev) {
ret = -EPIPE;
@@ -650,7 +650,7 @@ found:
ret = 0;

out:
- up_write(&file->port->mutex);
+ compat_up_write(&file->port->mutex);
return ret;
}

@@ -663,7 +663,7 @@ static int ib_umad_unreg_agent(struct ib
if (get_user(id, (u32 __user *) arg))
return -EFAULT;

- down_write(&file->port->mutex);
+ compat_down_write(&file->port->mutex);

if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) {
ret = -EINVAL;
@@ -674,7 +674,7 @@ static int ib_umad_unreg_agent(struct ib
file->agent[id] = NULL;

out:
- up_write(&file->port->mutex);
+ compat_up_write(&file->port->mutex);

if (agent)
ib_unregister_mad_agent(agent);
@@ -710,7 +710,7 @@ static int ib_umad_open(struct inode *in
if (!port)
return -ENXIO;

- down_write(&port->mutex);
+ compat_down_write(&port->mutex);

if (!port->ib_dev) {
ret = -ENXIO;
@@ -736,7 +736,7 @@ static int ib_umad_open(struct inode *in
list_add_tail(&file->port_list, &port->file_list);

out:
- up_write(&port->mutex);
+ compat_up_write(&port->mutex);
return ret;
}

@@ -748,7 +748,7 @@ static int ib_umad_close(struct inode *i
int already_dead;
int i;

- down_write(&file->port->mutex);
+ compat_down_write(&file->port->mutex);

already_dead = file->agents_dead;
file->agents_dead = 1;
@@ -761,14 +761,14 @@ static int ib_umad_close(struct inode *i

list_del(&file->port_list);

- downgrade_write(&file->port->mutex);
+ compat_downgrade_write(&file->port->mutex);

if (!already_dead)
for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i)
if (file->agent[i])
ib_unregister_mad_agent(file->agent[i]);

- up_read(&file->port->mutex);
+ compat_up_read(&file->port->mutex);

kfree(file);
kref_put(&dev->ref, ib_umad_release_dev);
@@ -839,10 +839,10 @@ static int ib_umad_sm_close(struct inode
};
int ret = 0;

- down_write(&port->mutex);
+ compat_down_write(&port->mutex);
if (port->ib_dev)
ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props);
- up_write(&port->mutex);
+ compat_up_write(&port->mutex);

up(&port->sm_sem);

@@ -906,7 +906,7 @@ static int ib_umad_init_port(struct ib_d
port->ib_dev = device;
port->port_num = port_num;
init_MUTEX(&port->sm_sem);
- init_rwsem(&port->mutex);
+ compat_init_rwsem(&port->mutex);
INIT_LIST_HEAD(&port->file_list);

port->dev = cdev_alloc();
@@ -992,7 +992,7 @@ static void ib_umad_kill_port(struct ib_
umad_port[port->dev_num] = NULL;
spin_unlock(&port_lock);

- down_write(&port->mutex);
+ compat_down_write(&port->mutex);

port->ib_dev = NULL;

@@ -1017,17 +1017,17 @@ static void ib_umad_kill_port(struct ib_
file->agents_dead = 1;
list_del_init(&file->port_list);

- downgrade_write(&port->mutex);
+ compat_downgrade_write(&port->mutex);

for (id = 0; id < IB_UMAD_MAX_AGENTS; ++id)
if (file->agent[id])
ib_unregister_mad_agent(file->agent[id]);

- up_read(&port->mutex);
- down_write(&port->mutex);
+ compat_up_read(&port->mutex);
+ compat_down_write(&port->mutex);
}

- up_write(&port->mutex);
+ compat_up_write(&port->mutex);

clear_bit(port->dev_num, dev_map);
}
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/