Re: NFS locking problem

Andrea Arcangeli (andrea@e-mind.com)
Sun, 3 Jan 1999 16:07:17 +0100 (CET)


On Sun, 3 Jan 1999, Anton Blanchard wrote:

>
> A have a process stuck at sleep_on(&block.b_wait) within nlmclnt_block. It
> is running vger 2.1.130 on a Sparc 4m/690 with knfsd-981204 (patches and
> knfsd server).

Could you try this patch? I also implemented down() using macros as sleep_on..

Index: linux/kernel/sched.c
diff -u linux/kernel/sched.c:1.1.1.4 linux/kernel/sched.c:1.1.1.1.2.36
--- linux/kernel/sched.c:1.1.1.4 Fri Jan 1 18:22:30 1999
+++ linux/kernel/sched.c Sun Jan 3 16:23:28 1999
@@ -854,56 +855,64 @@
* Either form may be used in conjunction with "up()".
*
*/
-static inline int __do_down(struct semaphore * sem, int task_state)
-{
- struct task_struct *tsk = current;
- struct wait_queue wait = { tsk, NULL };
- int ret = 0;

- tsk->state = task_state;
- add_wait_queue(&sem->wait, &wait);
+#define DOWN_VAR \
+ struct task_struct *tsk = current; \
+ struct wait_queue wait = { tsk, NULL };

- /*
- * Ok, we're set up. sem->count is known to be less than zero
- * so we must wait.
- *
- * We can let go the lock for purposes of waiting.
- * We re-acquire it after awaking so as to protect
- * all semaphore operations.
- *
- * If "up()" is called before we call waking_non_zero() then
- * we will catch it right away. If it is called later then
- * we will have to go through a wakeup cycle to catch it.
- *
- * Multiple waiters contend for the semaphore lock to see
- * who gets to gate through and who has to wait some more.
- */
- for (;;) {
- if (waking_non_zero(sem)) /* are we waking up? */
+#define DOWN_HEAD(task_state) \
+ \
+ \
+ tsk->state = (task_state); \
+ add_wait_queue(&sem->wait, &wait); \
+ \
+ /* \
+ * Ok, we're set up. sem->count is known to be less than zero \
+ * so we must wait. \
+ * \
+ * We can let go the lock for purposes of waiting. \
+ * We re-acquire it after awaking so as to protect \
+ * all semaphore operations. \
+ * \
+ * If "up()" is called before we call waking_non_zero() then \
+ * we will catch it right away. If it is called later then \
+ * we will have to go through a wakeup cycle to catch it. \
+ * \
+ * Multiple waiters contend for the semaphore lock to see \
+ * who gets to gate through and who has to wait some more. \
+ */ \
+ for (;;) { \
+ if (waking_non_zero(sem)) /* are we waking up? */ \
break; /* yes, exit loop */

- if (task_state == TASK_INTERRUPTIBLE && signal_pending(tsk)) {
- ret = -EINTR; /* interrupted */
- atomic_inc(&sem->count); /* give up on down operation */
- break;
- }
-
- schedule();
- tsk->state = task_state;
- }
- tsk->state = TASK_RUNNING;
+#define DOWN_TAIL(task_state) \
+ tsk->state = (task_state); \
+ } \
+ tsk->state = TASK_RUNNING; \
remove_wait_queue(&sem->wait, &wait);
- return ret;
-}

void __down(struct semaphore * sem)
{
- __do_down(sem,TASK_UNINTERRUPTIBLE);
+ DOWN_VAR
+ DOWN_HEAD(TASK_UNINTERRUPTIBLE)
+ schedule();
+ DOWN_TAIL(TASK_UNINTERRUPTIBLE)
}

int __down_interruptible(struct semaphore * sem)
{
- return __do_down(sem,TASK_INTERRUPTIBLE);
+ DOWN_VAR
+ int ret = 0;
+ DOWN_HEAD(TASK_INTERRUPTIBLE)
+ if (signal_pending(tsk))
+ {
+ ret = -EINTR; /* interrupted */
+ atomic_inc(&sem->count); /* give up on down operation */
+ break;
+ }
+ schedule();
+ DOWN_TAIL(TASK_INTERRUPTIBLE)
+ return ret;
}

#define SLEEP_ON_VAR \
@@ -956,6 +965,19 @@
SLEEP_ON_TAIL
}

+long sleep_on_timeout(struct wait_queue **p, long timeout)
+{
+ SLEEP_ON_VAR
+
+ current->state = TASK_UNINTERRUPTIBLE;
+
+ SLEEP_ON_HEAD
+ timeout = schedule_timeout(timeout);
+ SLEEP_ON_TAIL
+
+ return timeout;
+}
+
void scheduling_functions_end_here(void) { }

static inline void cascade_timers(struct timer_vec *tv)
Index: linux/fs/lockd/clntlock.c
diff -u linux/fs/lockd/clntlock.c:1.1.1.1 linux/fs/lockd/clntlock.c:1.1.1.1.2.1
--- linux/fs/lockd/clntlock.c:1.1.1.1 Fri Nov 20 00:01:10 1998
+++ linux/fs/lockd/clntlock.c Sun Jan 3 16:23:39 1999
@@ -71,12 +71,7 @@
* a 1 minute timeout would do. See the comment before
* nlmclnt_lock for an explanation.
*/
- /*
- * FIXME, can we be not interruptible and so be allowed to use
- * a timeout here? -arca
- */
-/* current->timeout = jiffies + 30 * HZ; */
- sleep_on(&block.b_wait);
+ sleep_on_timeout(&block.b_wait, 30*HZ);

for (head = &nlm_blocked; *head; head = &(*head)->b_next) {
if (*head == &block) {
--- /tmp/sched.h Sun Jan 3 16:32:58 1999
+++ linux/include/linux/sched.h Sun Jan 3 16:25:33 1999
@@ -460,2 +465,4 @@
extern void FASTCALL(sleep_on(struct wait_queue ** p));
+extern long FASTCALL(sleep_on_timeout(struct wait_queue ** p,
+ signed long timeout));
extern void FASTCALL(interruptible_sleep_on(struct wait_queue ** p));

Andrea Arcangeli

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/