[PATCH] PAG support, try #2

From: David Howells (dhowells@redhat.com)
Date: Wed May 14 2003 - 05:43:31 EST


Hi Linus,

Here's a revised patch for adding PAG support that incorporates suggestions
and corrections I've been sent.

David

diff -uNr linux-2.5.69/arch/i386/kernel/entry.S linux-2.5.69-pag/arch/i386/kernel/entry.S
--- linux-2.5.69/arch/i386/kernel/entry.S 2003-05-06 15:06:47.000000000 +0100
+++ linux-2.5.69-pag/arch/i386/kernel/entry.S 2003-05-14 10:36:24.000000000 +0100
@@ -852,6 +852,7 @@
          .long sys_clock_gettime /* 265 */
          .long sys_clock_getres
          .long sys_clock_nanosleep
-
+ .long sys_setpag
+ .long sys_getpag
  
 nr_syscalls=(.-sys_call_table)/4
diff -uNr linux-2.5.69/fs/file_table.c linux-2.5.69-pag/fs/file_table.c
--- linux-2.5.69/fs/file_table.c 2003-05-06 15:04:45.000000000 +0100
+++ linux-2.5.69-pag/fs/file_table.c 2003-05-14 09:08:19.000000000 +0100
@@ -166,6 +166,7 @@
         if (file->f_op && file->f_op->release)
                 file->f_op->release(inode, file);
         security_file_free(file);
+ vfs_token_put(file->f_token);
         fops_put(file->f_op);
         if (file->f_mode & FMODE_WRITE)
                 put_write_access(inode);
diff -uNr linux-2.5.69/fs/proc/array.c linux-2.5.69-pag/fs/proc/array.c
--- linux-2.5.69/fs/proc/array.c 2003-05-06 15:07:08.000000000 +0100
+++ linux-2.5.69-pag/fs/proc/array.c 2003-05-13 10:58:56.000000000 +0100
@@ -154,13 +154,14 @@
         read_lock(&tasklist_lock);
         buffer += sprintf(buffer,
                 "State:\t%s\n"
+ "Pag:\t%d\n"
                 "Tgid:\t%d\n"
                 "Pid:\t%d\n"
                 "PPid:\t%d\n"
                 "TracerPid:\t%d\n"
                 "Uid:\t%d\t%d\t%d\t%d\n"
                 "Gid:\t%d\t%d\t%d\t%d\n",
- get_task_state(p), p->tgid,
+ get_task_state(p), p->vfspag ? p->vfspag->pag : 0, p->tgid,
                 p->pid, p->pid ? p->real_parent->pid : 0,
                 p->pid && p->ptrace ? p->parent->pid : 0,
                 p->uid, p->euid, p->suid, p->fsuid,
diff -uNr linux-2.5.69/include/asm-i386/posix_types.h linux-2.5.69-pag/include/asm-i386/posix_types.h
--- linux-2.5.69/include/asm-i386/posix_types.h 2003-05-06 15:04:37.000000000 +0100
+++ linux-2.5.69-pag/include/asm-i386/posix_types.h 2003-05-12 10:19:15.000000000 +0100
@@ -13,6 +13,7 @@
 typedef unsigned short __kernel_nlink_t;
 typedef long __kernel_off_t;
 typedef int __kernel_pid_t;
+typedef int __kernel_pag_t;
 typedef unsigned short __kernel_ipc_pid_t;
 typedef unsigned short __kernel_uid_t;
 typedef unsigned short __kernel_gid_t;
diff -uNr linux-2.5.69/include/asm-i386/unistd.h linux-2.5.69-pag/include/asm-i386/unistd.h
--- linux-2.5.69/include/asm-i386/unistd.h 2003-05-06 15:04:37.000000000 +0100
+++ linux-2.5.69-pag/include/asm-i386/unistd.h 2003-05-13 10:47:59.000000000 +0100
@@ -273,8 +273,10 @@
 #define __NR_clock_gettime (__NR_timer_create+6)
 #define __NR_clock_getres (__NR_timer_create+7)
 #define __NR_clock_nanosleep (__NR_timer_create+8)
+#define __NR_setpag 268
+#define __NR_getpag 269
 
-#define NR_syscalls 268
+#define NR_syscalls 270
 
 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
 
diff -uNr linux-2.5.69/include/linux/cred.h linux-2.5.69-pag/include/linux/cred.h
--- linux-2.5.69/include/linux/cred.h 1970-01-01 01:00:00.000000000 +0100
+++ linux-2.5.69-pag/include/linux/cred.h 2003-05-14 10:57:01.000000000 +0100
@@ -0,0 +1,87 @@
+#ifndef _LINUX_CRED_H
+#define _LINUX_CRED_H
+
+#ifdef __KERNEL__
+
+#include <linux/param.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <asm/atomic.h>
+
+/*
+ * VFS session authentication token cache
+ *
+ * This is used to store the data required for extra levels of filesystem
+ * security (such as AFS/NFS kerberos keys, Samba workgroup/user/pass, or NTFS
+ * ACLs).
+ *
+ * VFS authentication tokens contain a single blob of data, consisting of three
+ * parts, all next to each other:
+ * (1) An FS name
+ * (2) A key
+ * (3) An arbitrary chunk of data
+ *
+ * Token blobs must not be changed once passed to the core kernel for
+ * management
+ */
+struct vfs_pag {
+ struct rb_node node;
+ atomic_t usage;
+ pag_t pag; /* Process Authentication Group ID */
+ struct list_head tokens; /* authentication tokens */
+ rwlock_t lock;
+};
+
+struct vfs_token {
+ atomic_t usage;
+ struct list_head link; /* link in pag's list */
+ unsigned short k_off; /* offset of key in blob */
+ unsigned short d_off; /* offset of data in blob */
+ size_t size; /* size of blob */
+ void *blob; /* blob containing key + data */
+};
+
+extern pag_t vfs_join_pag(pag_t pag);
+extern pag_t vfs_leave_pag(void);
+extern pag_t vfs_new_pag(void);
+extern long sys_setpag(pag_t);
+extern long sys_getpag(void);
+extern void vfs_unpag(const char *fsname);
+
+extern void vfs_pag_put(struct vfs_pag *);
+
+static inline struct vfs_pag *vfs_pag_get(struct vfs_pag *vfspag)
+{
+ atomic_inc(&vfspag->usage);
+ return vfspag;
+}
+
+static inline int is_vfs_token_valid(struct vfs_token *vtoken)
+{
+ return !list_empty(&vtoken->link);
+}
+
+extern int vfs_pag_add_token(const char *fsname,
+ unsigned short klen,
+ const void *key,
+ size_t dlen,
+ const void *data,
+ struct vfs_token **_token);
+
+extern struct vfs_token *vfs_pag_find_token(const char *fsname,
+ unsigned short klen,
+ const void *key);
+
+extern void vfs_pag_withdraw_token(struct vfs_token *vtoken);
+
+static inline struct vfs_token *vfs_token_get(struct vfs_token *vtoken)
+{
+ atomic_inc(&vtoken->usage);
+ return vtoken;
+}
+
+extern void vfs_token_put(struct vfs_token *vtoken);
+
+#endif /* __KERNEL__ */
+#endif /* _LINUX_CRED_H */
diff -uNr linux-2.5.69/include/linux/fs.h linux-2.5.69-pag/include/linux/fs.h
--- linux-2.5.69/include/linux/fs.h 2003-05-13 11:02:22.000000000 +0100
+++ linux-2.5.69-pag/include/linux/fs.h 2003-05-13 11:02:35.000000000 +0100
@@ -430,6 +430,7 @@
         mode_t f_mode;
         loff_t f_pos;
         struct fown_struct f_owner;
+ struct vfs_token *f_token; /* governing credential */
         unsigned int f_uid, f_gid;
         int f_error;
         struct file_ra_state f_ra;
diff -uNr linux-2.5.69/include/linux/sched.h linux-2.5.69-pag/include/linux/sched.h
--- linux-2.5.69/include/linux/sched.h 2003-05-06 15:07:12.000000000 +0100
+++ linux-2.5.69-pag/include/linux/sched.h 2003-05-13 10:29:18.000000000 +0100
@@ -28,6 +28,7 @@
 #include <linux/completion.h>
 #include <linux/pid.h>
 #include <linux/percpu.h>
+#include <linux/cred.h>
 
 struct exec_domain;
 
@@ -387,6 +388,7 @@
         gid_t gid,egid,sgid,fsgid;
         int ngroups;
         gid_t groups[NGROUPS];
+ struct vfs_pag *vfspag;
         kernel_cap_t cap_effective, cap_inheritable, cap_permitted;
         int keep_capabilities:1;
         struct user_struct *user;
diff -uNr linux-2.5.69/include/linux/types.h linux-2.5.69-pag/include/linux/types.h
--- linux-2.5.69/include/linux/types.h 2003-05-06 15:04:31.000000000 +0100
+++ linux-2.5.69-pag/include/linux/types.h 2003-05-12 10:19:08.000000000 +0100
@@ -24,6 +24,7 @@
 typedef __kernel_nlink_t nlink_t;
 typedef __kernel_off_t off_t;
 typedef __kernel_pid_t pid_t;
+typedef __kernel_pag_t pag_t;
 typedef __kernel_daddr_t daddr_t;
 typedef __kernel_key_t key_t;
 typedef __kernel_suseconds_t suseconds_t;
diff -uNr linux-2.5.69/init/main.c linux-2.5.69-pag/init/main.c
--- linux-2.5.69/init/main.c 2003-05-06 15:07:12.000000000 +0100
+++ linux-2.5.69-pag/init/main.c 2003-05-13 14:08:11.000000000 +0100
@@ -80,6 +80,7 @@
 extern void pidhash_init(void);
 extern void pidmap_init(void);
 extern void pte_chain_init(void);
+extern void credentials_init(void);
 extern void radix_tree_init(void);
 extern void free_initmem(void);
 extern void populate_rootfs(void);
@@ -434,6 +435,7 @@
         pidmap_init();
         pgtable_cache_init();
         pte_chain_init();
+ credentials_init();
         fork_init(num_physpages);
         proc_caches_init();
         security_scaffolding_startup();
diff -uNr linux-2.5.69/kernel/cred.c linux-2.5.69-pag/kernel/cred.c
--- linux-2.5.69/kernel/cred.c 1970-01-01 01:00:00.000000000 +0100
+++ linux-2.5.69-pag/kernel/cred.c 2003-05-14 11:37:36.000000000 +0100
@@ -0,0 +1,369 @@
+/* cred.c: authentication credentials management
+ *
+ * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/cred.h>
+
+static kmem_cache_t *vfs_token_cache;
+static kmem_cache_t *vfs_pag_cache;
+
+static struct rb_root vfs_pag_tree;
+static spinlock_t vfs_pag_lock = SPIN_LOCK_UNLOCKED;
+static pag_t vfs_pag_next = 1;
+
+static void vfs_pag_init_once(void *_vfspag, kmem_cache_t * cachep,
+ unsigned long flags)
+{
+ struct vfs_pag *vfspag = _vfspag;
+
+ if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) ==
+ SLAB_CTOR_CONSTRUCTOR) {
+ memset(vfspag, 0, sizeof(*vfspag));
+ INIT_LIST_HEAD(&vfspag->tokens);
+ rwlock_init(&vfspag->lock);
+ }
+}
+
+static void vfs_token_init_once(void *_vtoken, kmem_cache_t * cachep,
+ unsigned long flags)
+{
+ struct vfs_token *vtoken = _vtoken;
+
+ if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) ==
+ SLAB_CTOR_CONSTRUCTOR) {
+ memset(vtoken, 0, sizeof(*vtoken));
+ INIT_LIST_HEAD(&vtoken->link);
+ }
+}
+
+void __init credentials_init(void)
+{
+ vfs_pag_cache = kmem_cache_create("vfs_pag", sizeof(struct vfs_pag),
+ 0, 0, vfs_pag_init_once, NULL);
+ if (!vfs_pag_cache)
+ panic("Cannot create vfs pag SLAB cache");
+
+ vfs_token_cache = kmem_cache_create("vfs_token",
+ sizeof(struct vfs_token),
+ 0, 0, vfs_token_init_once, NULL);
+ if (!vfs_token_cache)
+ panic("Cannot create vfs token SLAB cache");
+}
+
+inline pag_t vfs_join_pag(pag_t pag)
+{
+ struct task_struct *tsk = current;
+ struct vfs_pag *vfspag, *xvfspag;
+ struct rb_node **p, *parent;
+
+ if (tsk->vfspag &&
+ tsk->vfspag->pag == pag)
+ return pag;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ spin_lock(&vfs_pag_lock);
+
+ parent = NULL;
+ p = &vfs_pag_tree.rb_node;
+
+ while (*p) {
+ parent = *p;
+ vfspag = rb_entry(parent, struct vfs_pag, node);
+
+ if (pag < vfspag->pag)
+ p = &(*p)->rb_left;
+ else if (pag > vfspag->pag)
+ p = &(*p)->rb_right;
+ else
+ goto pag_found;
+ }
+
+ spin_unlock(&vfs_pag_lock);
+ return -ENOENT;
+
+ pag_found:
+ xvfspag = xchg(&tsk->vfspag, vfs_pag_get(vfspag));
+ spin_unlock(&vfs_pag_lock);
+
+ if (xvfspag)
+ vfs_pag_put(xvfspag);
+ return pag;
+}
+
+inline pag_t vfs_leave_pag(void)
+{
+ struct vfs_pag *xvfspag;
+
+ xvfspag = xchg(&current->vfspag, NULL);
+
+ vfs_pag_put(xvfspag);
+ return 0;
+}
+
+inline pag_t vfs_new_pag(void)
+{
+ struct vfs_pag *vfspag, *xvfspag;
+ struct rb_node **p, *parent;
+
+ vfspag = kmem_cache_alloc(vfs_pag_cache, SLAB_KERNEL);
+ if (!vfspag)
+ return -ENOMEM;
+
+ atomic_set(&vfspag->usage, 1);
+
+ spin_lock(&vfs_pag_lock);
+
+ vfspag->pag = vfs_pag_next++;
+ if (vfspag->pag < 1)
+ vfspag->pag = 1;
+
+ parent = NULL;
+ p = &vfs_pag_tree.rb_node;
+
+ while (*p) {
+ parent = *p;
+ xvfspag = rb_entry(parent, struct vfs_pag, node);
+
+ if (vfspag->pag < xvfspag->pag)
+ p = &(*p)->rb_left;
+ else if (vfspag->pag > xvfspag->pag)
+ p = &(*p)->rb_right;
+ else
+ goto pag_exists;
+ }
+ goto insert_here;
+
+ /* we found a PAG of the same ID - walk the tree from that point
+ * looking for the next unused PAG */
+ pag_exists:
+ for (;;) {
+ vfspag->pag = vfs_pag_next++;
+ if (vfspag->pag < 1)
+ vfspag->pag = 1;
+
+ if (!parent->rb_parent)
+ p = &vfs_pag_tree.rb_node;
+ else if (parent->rb_parent->rb_left == parent)
+ p = &parent->rb_parent->rb_left;
+ else
+ p = &parent->rb_parent->rb_right;
+
+ parent = rb_next(parent);
+ if (!parent)
+ break;
+
+ xvfspag = rb_entry(parent, struct vfs_pag, node);
+ if (vfspag->pag < xvfspag->pag)
+ goto insert_here;
+ }
+
+ insert_here:
+ rb_link_node(&vfspag->node, parent, p);
+ rb_insert_color(&vfspag->node, &vfs_pag_tree);
+ spin_unlock(&vfs_pag_lock);
+
+ xvfspag = xchg(&current->vfspag, vfspag);
+ if (xvfspag)
+ vfs_pag_put(xvfspag);
+
+ return vfspag->pag;
+}
+
+/*
+ * join an existing PAG (+ve), run without PAG (0), or create and join new PAG (-1)
+ * - PAG IDs must be +ve, >0 and unique
+ * - returns ID of PAG joined or 0 if now running without a PAG
+ */
+long sys_setpag(pag_t pag)
+{
+ if (pag > 0) return vfs_join_pag(pag);
+ else if (pag == 0) return vfs_leave_pag();
+ else if (pag == -1) return vfs_new_pag();
+ else return -EINVAL;
+}
+
+/*
+ * get the PAG of the current process, or 0 if it doesn't have one
+ */
+long sys_getpag(void)
+{
+ struct vfs_pag *vfspag = current->vfspag;
+
+ return vfspag ? vfspag->pag : 0;
+}
+
+/*
+ * dispose of a VFS pag
+ */
+void vfs_pag_put(struct vfs_pag *vfspag)
+{
+ struct vfs_token *vtoken;
+
+ if (vfspag && atomic_dec_and_lock(&vfspag->usage, &vfs_pag_lock)) {
+ rb_erase(&vfspag->node, &vfs_pag_tree);
+ spin_unlock(&vfs_pag_lock);
+
+ while (!list_empty(&vfspag->tokens)) {
+ vtoken =
+ list_entry(vfspag->tokens.next,
+ struct vfs_token, link);
+ list_del_init(&vtoken->link);
+ vfs_token_put(vtoken);
+ }
+
+ kmem_cache_free(vfs_pag_cache, vfspag);
+ }
+}
+
+/*
+ * dispose of a VFS token
+ */
+void vfs_token_put(struct vfs_token *vtoken)
+{
+ if (vtoken && atomic_dec_and_test(&vtoken->usage)) {
+ kfree(vtoken->blob);
+ kmem_cache_free(vfs_pag_cache, vtoken);
+ }
+}
+
+/*
+ * add an authentication token to a pag list
+ */
+int vfs_pag_add_token(const char *fsname,
+ unsigned short klen,
+ const void *key,
+ size_t dlen,
+ const void *data,
+ struct vfs_token **_vtoken)
+{
+ struct vfs_token *vtoken;
+ struct vfs_pag *vfspag = current->vfspag;
+
+ *_vtoken = NULL;
+
+ if (!vfspag)
+ return -EACCES;
+
+ vtoken = kmem_cache_alloc(vfs_token_cache, SLAB_KERNEL);
+ if (!vtoken)
+ return -ENOMEM;
+
+ vtoken->k_off = strlen(fsname) + 1;
+ vtoken->d_off = vtoken->k_off + klen;
+ vtoken->size = vtoken->d_off + dlen;
+
+ vtoken->blob = kmalloc(vtoken->size, SLAB_KERNEL);
+ if (!vtoken->blob) {
+ kfree(vtoken);
+ return -ENOMEM;
+ }
+
+ atomic_set(&vtoken->usage, 1);
+
+ memcpy(vtoken->blob, fsname, vtoken->k_off);
+ memcpy(vtoken->blob + vtoken->k_off, key, klen);
+ memcpy(vtoken->blob + vtoken->d_off, key, dlen);
+
+ write_lock(&vfspag->lock);
+ list_add_tail(&vtoken->link, &vfspag->tokens);
+ write_unlock(&vfspag->lock);
+
+ *_vtoken = vtoken;
+ return 0;
+}
+
+EXPORT_SYMBOL(vfs_pag_add_token);
+
+/*
+ * search for a token covering a particular filesystem key in the specified pag list
+ */
+struct vfs_token *vfs_pag_find_token(const char *fsname,
+ unsigned short klen,
+ const void *key)
+{
+ struct vfs_token *vtoken;
+ struct vfs_pag *vfspag = current->vfspag;
+
+ if (!vfspag)
+ return NULL;
+
+ read_lock(&vfspag->lock);
+
+ list_for_each_entry(vtoken, &vfspag->tokens, link) {
+ if (vtoken->d_off - vtoken->k_off == klen &&
+ strcmp(vtoken->blob, fsname) == 0 &&
+ memcmp(vtoken->blob + vtoken->k_off, key, klen) == 0)
+ goto found;
+ }
+
+ read_unlock(&vfspag->lock);
+ return NULL;
+
+ found:
+ vfs_token_get(vtoken);
+ read_unlock(&vfspag->lock);
+ return vtoken;
+}
+
+EXPORT_SYMBOL(vfs_pag_find_token);
+
+/*
+ * withdraw a token from a pag list
+ */
+void vfs_pag_withdraw_token(struct vfs_token *vtoken)
+{
+ struct vfs_pag *vfspag = current->vfspag;
+
+ if (!vfspag)
+ return;
+
+ write_lock(&vfspag->lock);
+ list_del_init(&vtoken->link);
+ write_unlock(&vfspag->lock);
+
+ vfs_token_put(vtoken);
+}
+
+EXPORT_SYMBOL(vfs_pag_withdraw_token);
+
+/*
+ * withdraw all tokens for the named filesystem from the current PAG
+ */
+void vfs_unpag(const char *fsname)
+{
+ struct list_head *_n, *_p;
+ struct vfs_token *vtoken;
+ struct vfs_pag *vfspag = current->vfspag;
+
+ if (!vfspag)
+ return;
+
+ write_lock(&vfspag->lock);
+
+ list_for_each_safe(_p, _n, &vfspag->tokens) {
+ vtoken = list_entry(_p, struct vfs_token, link);
+
+ if (strcmp(fsname, vtoken->blob) == 0) {
+ list_del_init(&vtoken->link);
+ vfs_token_put(vtoken);
+ }
+ }
+
+ write_unlock(&vfspag->lock);
+}
diff -uNr linux-2.5.69/kernel/fork.c linux-2.5.69-pag/kernel/fork.c
--- linux-2.5.69/kernel/fork.c 2003-05-06 15:07:12.000000000 +0100
+++ linux-2.5.69-pag/kernel/fork.c 2003-05-14 11:12:12.000000000 +0100
@@ -884,6 +884,10 @@
 
         if (clone_flags & CLONE_CHILD_SETTID)
                 p->set_child_tid = child_tidptr;
+
+ if (p->vfspag)
+ vfs_pag_get(p->vfspag);
+
         /*
          * Clear TID on mm_release()?
          */
diff -uNr linux-2.5.69/kernel/Makefile linux-2.5.69-pag/kernel/Makefile
--- linux-2.5.69/kernel/Makefile 2003-05-06 15:04:56.000000000 +0100
+++ linux-2.5.69-pag/kernel/Makefile 2003-05-13 10:45:27.000000000 +0100
@@ -3,7 +3,7 @@
 #
 
 obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
- exit.o itimer.o time.o softirq.o resource.o \
+ cred.o exit.o itimer.o time.o softirq.o resource.o \
             sysctl.o capability.o ptrace.o timer.o user.o \
             signal.o sys.o kmod.o workqueue.o futex.o pid.o \
             rcupdate.o intermodule.o extable.o params.o posix-timers.o
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Thu May 15 2003 - 22:00:51 EST