Re: [patch] af_unix fix for a panic a DoS and a memory leak [Re:

Andrea Arcangeli (andrea@e-mind.com)
Tue, 2 Mar 1999 23:42:27 +0100 (CET)


On Tue, 2 Mar 1999, Andrea Arcangeli wrote:

> unix_socket *unix_socket_table[UNIX_HASH_SIZE+1];
>+int unix_nr_socks = 0;
>+spinlock_t unix_nr_lock = SPIN_LOCK_UNLOCKED;

The spinlock was allowing the af_unix code to run parallel with bh
handlers, but thinking 1 second more about the issue, since we only need
to decrease an intereger memory location it doesn't worth to cli the local
cpu just to run parallel with bh handlers for a so short time ;).

Note it's also valid replace the start_bh_atomic() with
disable_bh(TIMER_BH), but disable_bh(TIMER_BH) should (in 2.2.2 doesn't)
use a spinlock to avoid all races, so it's not a good idea to run it to
avoid a spinlock in first place ;). start_bh_atomic() instead will avoid
all local cli.

I removed the spinlock and I rediffed against 2.2.2:

Index: include/net/sock.h
===================================================================
RCS file: /var/cvs/linux/include/net/sock.h,v
retrieving revision 1.1.1.3
retrieving revision 1.1.2.6
diff -u -r1.1.1.3 -r1.1.2.6
--- sock.h 1999/02/20 15:42:53 1.1.1.3
+++ linux/include/net/sock.h 1999/03/01 13:11:34 1.1.2.6
@@ -99,8 +99,7 @@
struct semaphore readsem;
struct sock * other;
struct sock ** list;
- int marksweep;
-#define MARKED 1
+ struct sock * gc_next;
int inflight;
};

Index: net/unix//Makefile
===================================================================
RCS file: /var/cvs/linux/net/unix/Makefile,v
retrieving revision 1.1.1.1
retrieving revision 1.1.2.1
diff -u -r1.1.1.1 -r1.1.2.1
Index: net/unix//af_unix.c
===================================================================
RCS file: /var/cvs/linux/net/unix/af_unix.c,v
retrieving revision 1.1.1.2
retrieving revision 1.1.2.6
diff -u -r1.1.1.2 -r1.1.2.6
--- af_unix.c 1999/01/18 13:41:22 1.1.1.2
+++ linux/net/unix/af_unix.c 1999/03/02 22:34:45 1.1.2.6
@@ -33,6 +33,10 @@
* Lots of bug fixes.
* Alexey Kuznetosv : Repaired (I hope) bugs introduces
* by above two patches.
+ * Andrea Arcangeli : Check max backlog of the listen sock
+ * at connect time and security fix
+ * that limits the max number of socks
+ * to 2*max_files.
*
* Known differences from reference BSD that was tested:
*
@@ -102,6 +106,7 @@
int sysctl_unix_destroy_delay = 10*HZ;

unix_socket *unix_socket_table[UNIX_HASH_SIZE+1];
+int unix_nr_socks = 0;

#define unix_sockets_unbound (unix_socket_table[UNIX_HASH_SIZE])

@@ -263,6 +268,13 @@
unix_socket *sk=(unix_socket *)data;
if(!unix_locked(sk) && atomic_read(&sk->wmem_alloc) == 0)
{
+ /*
+ * Don't need any lock because all other places that touch
+ * unix_nr_socks run from a bh atomic context and
+ * bh handers are atomic in respect to themself. -arca
+ */
+ --unix_nr_socks;
+
sk_free(sk);

/* socket destroyed, decrement count */
@@ -347,6 +359,10 @@

if(!unix_locked(sk) && atomic_read(&sk->wmem_alloc) == 0)
{
+ start_bh_atomic();
+ --unix_nr_socks;
+ end_bh_atomic();
+
sk_free(sk);

/* socket destroyed, decrement count */
@@ -371,6 +387,8 @@
return -EOPNOTSUPP; /* Only stream sockets accept */
if (!sk->protinfo.af_unix.addr)
return -EINVAL; /* No listens on an unbound socket */
+ if ((unsigned) backlog > SOMAXCONN)
+ backlog = SOMAXCONN;
sk->max_ack_backlog=backlog;
sk->state=TCP_LISTEN;
sock->flags |= SO_ACCEPTCON;
@@ -388,10 +406,22 @@
{
struct sock *sk;

+ start_bh_atomic();
+ if (unix_nr_socks++ > 2*max_files)
+ {
+ end_bh_atomic();
+ goto too_many;
+ }
+ end_bh_atomic();
+
MOD_INC_USE_COUNT;
sk = sk_alloc(PF_UNIX, GFP_KERNEL, 1);
if (!sk) {
MOD_DEC_USE_COUNT;
+ too_many:
+ start_bh_atomic();
+ --unix_nr_socks;
+ end_bh_atomic();
return NULL;
}

@@ -707,6 +737,10 @@
if (other == NULL || other->dead || other->state != TCP_LISTEN)
goto out;

+ /* Fail if the listen backlog is just full. -arca */
+ if (other->ack_backlog >= other->max_ack_backlog)
+ goto out;
+
err = -ENOMEM;
if (newsk == NULL || skb == NULL)
goto out;
@@ -817,9 +851,7 @@
tsk = skb->sk;
sk->ack_backlog--;
kfree_skb(skb);
- if (!tsk->dead)
- break;
- unix_release_sock(tsk);
+ break;
}


Index: net/unix//garbage.c
===================================================================
RCS file: /var/cvs/linux/net/unix/garbage.c,v
retrieving revision 1.1.1.1
retrieving revision 1.1.2.3
diff -u -r1.1.1.1 -r1.1.2.3
--- garbage.c 1999/01/18 01:27:46 1.1.1.1
+++ linux/net/unix/garbage.c 1999/03/01 13:11:33 1.1.2.3
@@ -17,11 +17,12 @@
*
* - explicit stack instead of recursion
* - tail recurse on first born instead of immediate push/pop
+ * - we gather the stuff that should not be killed into list
+ * and stack is just a path from root to the current pointer.
*
* Future optimizations:
*
* - don't just push entire root set; process in place
- * - use linked list for internal stack
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -51,6 +52,14 @@
* That might be the reason of random oopses on close_fp() in
* unrelated processes.
*
+ * AV 28 Feb 1999
+ * Kill the explicit allocation of stack. Now we keep the marked
+ * socks in the list with root in gc_current to one of the nodes.
+ * Stack is represented as path from gc_current to the last elem.
+ * Unmark now means "add to list". Push == "make it a son of
+ * gc_current". Pop == "move gc_current to parent". We keep only
+ * pointers to parents (->gc_next).
+ *
*/

#include <linux/kernel.h>
@@ -65,7 +74,6 @@
#include <linux/netdevice.h>
#include <linux/file.h>
#include <linux/proc_fs.h>
-#include <linux/vmalloc.h>

#include <net/sock.h>
#include <net/tcp.h>
@@ -74,9 +82,8 @@

/* Internal data structures and random procedures: */

-static unix_socket **stack; /* stack of objects to mark */
-static int in_stack = 0; /* first free entry in stack */
-static int max_stack; /* Top of stack */
+#define GC_HEAD ((unix_socket *) -1)
+static unix_socket *gc_current=GC_HEAD; /* stack of objects to mark */

extern inline unix_socket *unix_get_socket(struct file *filp)
{
@@ -122,32 +129,25 @@
/*
* Garbage Collector Support Functions
*/
-
-extern inline void push_stack(unix_socket *x)
-{
- if (in_stack == max_stack)
- panic("can't push onto full stack");
- stack[in_stack++] = x;
-}

extern inline unix_socket *pop_stack(void)
{
- if (in_stack == 0)
- panic("can't pop empty gc stack");
- return stack[--in_stack];
+ unix_socket *p=gc_current;
+ gc_current = p->protinfo.af_unix.gc_next;
+ return p;
}

extern inline int empty_stack(void)
{
- return in_stack == 0;
+ return gc_current == GC_HEAD;
}

extern inline void maybe_unmark_and_push(unix_socket *x)
{
- if (!(x->protinfo.af_unix.marksweep&MARKED))
+ if (x->protinfo.af_unix.gc_next)
return;
- x->protinfo.af_unix.marksweep&=~MARKED;
- push_stack(x);
+ x->protinfo.af_unix.gc_next = gc_current;
+ gc_current = x;
}


@@ -169,23 +169,9 @@
return;
in_unix_gc=1;

- if(stack==NULL || max_files>max_stack)
- {
- if(stack)
- vfree(stack);
- stack=(unix_socket **)vmalloc(max_files*sizeof(struct unix_socket *));
- if(stack==NULL)
- {
- printk(KERN_NOTICE "unix_gc: deferred due to low memory.\n");
- in_unix_gc=0;
- return;
- }
- max_stack=max_files;
- }
-
forall_unix_sockets(i, s)
{
- s->protinfo.af_unix.marksweep|=MARKED;
+ s->protinfo.af_unix.gc_next=NULL;
}
/*
* Everything is now marked
@@ -276,9 +262,9 @@

if (f)
{
- if ((f->protinfo.af_unix.marksweep&MARKED))
+ if (!f->protinfo.af_unix.gc_next)
{
- f->protinfo.af_unix.marksweep&=~MARKED;
+ f->protinfo.af_unix.gc_next=GC_HEAD;
x=f;
f=NULL;
goto tail;
@@ -290,7 +276,7 @@

forall_unix_sockets(i, s)
{
- if (s->protinfo.af_unix.marksweep&MARKED)
+ if (!s->protinfo.af_unix.gc_next)
{
struct sk_buff *nextsk;
skb=skb_peek(&s->receive_queue);
Index: net/unix//sysctl_net_unix.c
===================================================================
RCS file: /var/cvs/linux/net/unix/sysctl_net_unix.c,v
retrieving revision 1.1.1.1
retrieving revision 1.1.2.1
diff -u -r1.1.1.1 -r1.1.2.1

Andrea Arcangeli

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/