Re: [maybe fixed.. i hope i hope i hope] Re: snipe hunt

From: Manfred Spraul (manfreds@colorfullife.com)
Date: Fri May 05 2000 - 13:10:24 EST


I've written a patch. It boots, and "ps xua" doesn't crash immediately

* task_lock is a spinlock
* it protects ->mm, ->files and ->fs
* fs/proc/{base,array}.c updated

I've noticed another sync problem:
        task->flags is used without synchronization.
        atomic_t?

TODO:
* check kernel/exec_domain: put_fs_struct
* check proc functions on zombies.
* check fork(): any problems?

* ->sig: spinlocks in kernel/signal.c
* ->flags: atomic_t

--
	Manfred

// $Header$ // Kernel Version: // VERSION = 2 // PATCHLEVEL = 3 // SUBLEVEL = 99 // EXTRAVERSION = -pre6 --- 2.3/kernel/ptrace.c Thu Apr 27 11:27:26 2000 +++ build-2.3/kernel/ptrace.c Fri May 5 18:43:02 2000 @@ -121,10 +121,13 @@ struct vm_area_struct * vma; /* Worry about races with exit() */ - lock_kernel(); + task_lock(tsk); mm = tsk->mm; - atomic_inc(&mm->mm_users); - unlock_kernel(); + if (mm) + atomic_inc(&mm->mm_users); + task_unlock(tsk); + if (!mm) + return 0; down(&mm->mmap_sem); vma = find_extend_vma(mm, addr); --- 2.3/arch/i386/kernel/ptrace.c Fri Jan 21 13:08:25 2000 +++ build-2.3/arch/i386/kernel/ptrace.c Fri May 5 18:56:38 2000 @@ -134,7 +134,6 @@ { struct task_struct *child; struct user * dummy = NULL; - unsigned long flags; int i, ret; lock_kernel(); @@ -151,15 +150,19 @@ ret = -ESRCH; read_lock(&tasklist_lock); child = find_task_by_pid(pid); - read_unlock(&tasklist_lock); /* FIXME!!! */ + if (child) + get_task_struct(child); + read_unlock(&tasklist_lock); if (!child) goto out; + ret = -EPERM; if (pid == 1) /* you may not mess with init */ - goto out; + goto out_tsk; + if (request == PTRACE_ATTACH) { if (child == current) - goto out; + goto out_tsk; if ((!child->dumpable || (current->uid != child->euid) || (current->uid != child->suid) || @@ -168,34 +171,33 @@ (current->gid != child->sgid) || (!cap_issubset(child->cap_permitted, current->cap_permitted)) || (current->gid != child->gid)) && !capable(CAP_SYS_PTRACE)) - goto out; + goto out_tsk; /* the same process cannot be attached many times */ if (child->flags & PF_PTRACED) - goto out; + goto out_tsk; child->flags |= PF_PTRACED; - write_lock_irqsave(&tasklist_lock, flags); + write_lock_irq(&tasklist_lock); if (child->p_pptr != current) { REMOVE_LINKS(child); child->p_pptr = current; SET_LINKS(child); } - write_unlock_irqrestore(&tasklist_lock, flags); + write_unlock_irq(&tasklist_lock); send_sig(SIGSTOP, child, 1); ret = 0; - goto out; + goto out_tsk; } ret = -ESRCH; if (!(child->flags & PF_PTRACED)) - goto out; + goto out_tsk; if (child->state != TASK_STOPPED) { if (request != PTRACE_KILL) - goto out; + goto out_tsk; } if (child->p_pptr != current) - goto out; - + goto out_tsk; switch (request) { /* when I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ @@ -270,7 +272,7 @@ data &= ~DR_CONTROL_RESERVED; for(i=0; i<4; i++) if ((0x5f54 >> ((data >> (16 + 4*i)) & 0xf)) & 1) - goto out; + goto out_tsk; } addr -= (long) &dummy->u_debugreg; @@ -347,11 +349,11 @@ break; child->flags &= ~(PF_PTRACED|PF_TRACESYS); child->exit_code = data; - write_lock_irqsave(&tasklist_lock, flags); + write_lock_irq(&tasklist_lock); REMOVE_LINKS(child); child->p_pptr = child->p_opptr; SET_LINKS(child); - write_unlock_irqrestore(&tasklist_lock, flags); + write_unlock_irq(&tasklist_lock); /* make sure the single step bit is not set. */ tmp = get_stack_long(child, EFL_OFFSET) & ~TRAP_FLAG; put_stack_long(child, EFL_OFFSET, tmp); @@ -435,6 +437,8 @@ ret = -EIO; break; } +out_tsk: + free_task_struct(child); out: unlock_kernel(); return ret; --- 2.3/include/linux/sched.h Thu Apr 27 11:27:25 2000 +++ build-2.3/include/linux/sched.h Fri May 5 18:10:34 2000 @@ -346,8 +346,8 @@ /* Thread group tracking */ u32 parent_exec_id; u32 self_exec_id; -/* Protection of fields allocatio/deallocation */ - struct semaphore exit_sem; +/* Protection of (de-)allocation: mm, files, fs */ + spinlock_t alloc_lock; }; /* @@ -418,7 +418,7 @@ blocked: {{0}}, \ sigqueue: NULL, \ sigqueue_tail: &tsk.sigqueue, \ - exit_sem: __MUTEX_INITIALIZER(tsk.exit_sem) \ + alloc_lock: SPIN_LOCK_UNLOCKED \ } @@ -835,19 +835,14 @@ write_unlock_irq(&tasklist_lock); } -static inline int task_lock(struct task_struct *p) +static inline void task_lock(struct task_struct *p) { - down(&p->exit_sem); - if (p->p_pptr) - return 1; - /* He's dead, Jim. You take his wallet, I'll take the tricorder... */ - up(&p->exit_sem); - return 0; + spin_lock(&p->alloc_lock); } static inline void task_unlock(struct task_struct *p) { - up(&p->exit_sem); + spin_unlock(&p->alloc_lock); } #endif /* __KERNEL__ */ --- 2.3/kernel/fork.c Thu Apr 27 11:27:26 2000 +++ build-2.3/kernel/fork.c Fri May 5 18:11:39 2000 @@ -680,7 +680,7 @@ p->p_cptr = NULL; init_waitqueue_head(&p->wait_chldexit); p->vfork_sem = NULL; - sema_init(&p->exit_sem, 1); + spin_lock_init(&p->alloc_lock); p->sigpending = 0; sigemptyset(&p->signal); --- 2.3/fs/proc/array.c Thu Apr 27 11:27:14 2000 +++ build-2.3/fs/proc/array.c Fri May 5 19:54:57 2000 @@ -148,20 +148,25 @@ { int g; + read_lock(&tasklist_lock); buffer += sprintf(buffer, "State:\t%s\n" "Pid:\t%d\n" "PPid:\t%d\n" "TracerPid:\t%d\n" "Uid:\t%d\t%d\t%d\t%d\n" - "Gid:\t%d\t%d\t%d\t%d\n" - "FDSize:\t%d\n" - "Groups:\t", + "Gid:\t%d\t%d\t%d\t%d\n", get_task_state(p), p->pid, p->p_opptr->pid, p->p_pptr->pid != p->p_opptr->pid ? p->p_opptr->pid : 0, p->uid, p->euid, p->suid, p->fsuid, - p->gid, p->egid, p->sgid, p->fsgid, + p->gid, p->egid, p->sgid, p->fsgid); + read_unlock(&tasklist_lock); + task_lock(p); + buffer += sprintf(buffer, + "FDSize:\t%d\n" + "Groups:\t", p->files ? p->files->max_fds : 0); + task_unlock(p); for (g = 0; g < p->ngroups; g++) buffer += sprintf(buffer, "%d ", p->groups[g]); @@ -264,20 +269,25 @@ } -/* task is locked, so we are safe here */ - int proc_pid_status(struct task_struct *task, char * buffer) { char * orig = buffer; - struct mm_struct *mm = task->mm; + struct mm_struct *mm; #if defined(CONFIG_ARCH_S390) int line,len; #endif buffer = task_name(task, buffer); buffer = task_state(task, buffer); - if (mm) + task_lock(task); + mm = task->mm; + if(mm) + atomic_inc(&mm->mm_users); + task_unlock(task); + if (mm) { buffer = task_mem(mm, buffer); + mmput(mm); + } buffer = task_sig(task, buffer); buffer = task_cap(task, buffer); #if defined(CONFIG_ARCH_S390) @@ -287,20 +297,24 @@ return buffer - orig; } -/* task is locked, so we are safe here */ - int proc_pid_stat(struct task_struct *task, char * buffer) { - struct mm_struct *mm = task->mm; unsigned long vsize, eip, esp, wchan; long priority, nice; int tty_pgrp; sigset_t sigign, sigcatch; char state; int res; + pid_t ppid; + struct mm_struct *mm; state = *get_task_state(task); vsize = eip = esp = 0; + task_lock(task); + mm = task->mm; + if(mm) + atomic_inc(&mm->mm_users); + task_unlock(task); if (mm) { struct vm_area_struct *vma; down(&mm->mmap_sem); @@ -330,13 +344,16 @@ nice = task->priority; nice = 20 - (nice * 20 + DEF_PRIORITY / 2) / DEF_PRIORITY; + read_lock(&tasklist_lock); + ppid = task->p_opptr->pid; + read_unlock(&tasklist_lock); res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \ %lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld %lu %lu %ld %lu %lu %lu %lu %lu \ %lu %lu %lu %lu %lu %lu %lu %lu %d %d\n", task->pid, task->comm, state, - task->p_opptr->pid, + ppid, task->pgrp, task->session, task->tty ? kdev_t_to_nr(task->tty->device) : 0, @@ -376,6 +393,8 @@ task->cnswap, task->exit_signal, task->processor); + if(mm) + mmput(mm); return res; } @@ -455,9 +474,14 @@ int proc_pid_statm(struct task_struct *task, char * buffer) { - struct mm_struct *mm = task->mm; + struct mm_struct *mm; int size=0, resident=0, share=0, trs=0, lrs=0, drs=0, dt=0; + task_lock(task); + mm = task->mm; + if(mm) + atomic_inc(&mm->mm_users); + task_unlock(task); if (mm) { struct vm_area_struct * vma; down(&mm->mmap_sem); @@ -482,6 +506,7 @@ vma = vma->vm_next; } up(&mm->mmap_sem); + mmput(mm); } return sprintf(buffer,"%d %d %d %d %d %d %d\n", size, resident, share, trs, lrs, drs, dt); @@ -523,7 +548,7 @@ ssize_t proc_pid_read_maps (struct task_struct *task, struct file * file, char * buf, size_t count, loff_t *ppos) { - struct mm_struct *mm = task->mm; + struct mm_struct *mm; struct vm_area_struct * map, * next; char * destptr = buf, * buffer; loff_t lineno; @@ -539,7 +564,14 @@ if (!buffer) goto out; - if (!mm || count == 0) + if (count == 0) + goto getlen_out; + task_lock(task); + mm = task->mm; + if (mm) + atomic_inc(&mm->mm_users); + task_unlock(task); + if (!mm) goto getlen_out; /* Check whether the mmaps could change if we sleep */ @@ -637,6 +669,7 @@ /* encode f_pos */ *ppos = (lineno << MAPS_LINE_SHIFT) + column; + mmput(mm); getlen_out: retval = destptr - buf; --- 2.3/fs/proc/base.c Thu Apr 27 11:27:14 2000 +++ build-2.3/fs/proc/base.c Fri May 5 19:55:52 2000 @@ -59,9 +59,11 @@ int result = -ENOENT; struct task_struct *task = inode->u.proc_i.task; - if (!task_lock(task)) - return result; + task_lock(task); mm = task->mm; + if (mm) + atomic_inc(&mm->mm_users); + task_unlock(task); if (!mm) goto out; down(&mm->mmap_sem); @@ -77,67 +79,80 @@ vma = vma->vm_next; } up(&mm->mmap_sem); + mmput(mm); out: - task_unlock(task); return result; } static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) { + struct fs_struct *fs; int result = -ENOENT; - if (task_lock(inode->u.proc_i.task)) { - struct fs_struct *fs = inode->u.proc_i.task->fs; - if (fs) { - *mnt = mntget(fs->pwdmnt); - *dentry = dget(fs->pwd); - result = 0; - } - task_unlock(inode->u.proc_i.task); + task_lock(inode->u.proc_i.task); + fs = inode->u.proc_i.task->fs; + if(fs) + atomic_inc(&fs->count); + task_unlock(inode->u.proc_i.task); + if (fs) { + *mnt = mntget(fs->pwdmnt); + *dentry = dget(fs->pwd); + result = 0; + put_fs_struct(fs); } return result; } static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) { + struct fs_struct *fs; int result = -ENOENT; - if (task_lock(inode->u.proc_i.task)) { - struct fs_struct *fs = inode->u.proc_i.task->fs; - if (fs) { - *mnt = mntget(fs->rootmnt); - *dentry = dget(fs->root); - result = 0; - } - task_unlock(inode->u.proc_i.task); + task_lock(inode->u.proc_i.task); + fs = inode->u.proc_i.task->fs; + if(fs) + atomic_inc(&fs->count); + task_unlock(inode->u.proc_i.task); + if (fs) { + *mnt = mntget(fs->rootmnt); + *dentry = dget(fs->root); + result = 0; } return result; } -/* task is locked and can't drop mm, so we are safe */ - static int proc_pid_environ(struct task_struct *task, char * buffer) { - struct mm_struct *mm = task->mm; + struct mm_struct *mm; int res = 0; + task_lock(task); + mm = task->mm; + if (mm) + atomic_inc(&mm->mm_users); + task_unlock(task); if (mm) { int len = mm->env_end - mm->env_start; if (len > PAGE_SIZE) len = PAGE_SIZE; res = access_process_vm(task, mm->env_start, buffer, len, 0); + mmput(mm); } return res; } -/* task is locked and can't drop mm, so we are safe */ - static int proc_pid_cmdline(struct task_struct *task, char * buffer) { - struct mm_struct *mm = task->mm; + struct mm_struct *mm; int res = 0; + task_lock(task); + mm = task->mm; + if (mm) + atomic_inc(&mm->mm_users); + task_unlock(task); if (mm) { int len = mm->arg_end - mm->arg_start; if (len > PAGE_SIZE) len = PAGE_SIZE; res = access_process_vm(task, mm->arg_start, buffer, len, 0); + mmput(mm); } return res; } @@ -216,10 +231,7 @@ struct task_struct *task = inode->u.proc_i.task; ssize_t res; - if (!task_lock(task)) - return -EIO; res = proc_pid_read_maps(task, file, buf, count, ppos); - task_unlock(task); return res; } @@ -243,14 +255,19 @@ if (!(page = __get_free_page(GFP_KERNEL))) return -ENOMEM; - if (!task_lock(task)) { + /* FIXME: check that all proc_read function + * handle a dying task gracefully. + * The memory for the task structure + * won't be freed, we've called get_task_struct(). + */ +#if 0 + if (!task->p_pptr) { free_page(page); return -EIO; } +#endif length = inode->u.proc_i.op.proc_read(task, (char*)page); - - task_unlock(task); if (length < 0) { free_page(page);

- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.rutgers.edu Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Sun May 07 2000 - 21:00:18 EST