asm/uaccess.h reimplementation, patch. [was: Re: 2.1 kernel bloat revisited]

Ingo Molnar (mingo@pc5829.hil.siemens.at)
Sun, 30 Mar 1997 12:14:09 +0200 (MET DST)


[repost]

On Sat, 29 Mar 1997 kdp0101@hpmail.lrz-muenchen.de wrote:

> The big increase in the _ioctl() functions seem to suggest that the
> new _user() user memory access primitives generate much more code
> than the 2.0 routines.

a simple access_ok() inline is ~30 bytes code.

The problem is that we have to check two things runtime:

1) is this call done from kernel-space
2) if not then is the user-space pointer in user-space?

i've proposed one solution to this problem, but unfortunately it didnt
work work on 32 bit setups with more than 2G physical memory.

but i have a new idea ;)

what about doing this:

ptr &= current->mem_mask;

where mem_mask is depending on what 'mode' we are: user or kernel. This
restricts memory configurations to 2^N user-space virtual memory sizes ...
Dave, is this acceptable on Sparc? [the trick is to make the user-space
size 2^N, not physical memory ... thus we can have 3.5G physical memory
setups too: 512M user-space virtual memory, 3.5G physical memory]

set_fs() get_fs() modifies current->mem_mask. [but see the attached code
for an alternative and cleaner interface]

an example:

if(!access_ok()) return -EFAULT;

old code:

000018c8 <test_test_test+4> movl 0x0,%eax
000018cd <test_test_test+9> testb $0x3,0x2d8(%eax)
000018d4 <test_test_test+10> je 000018f4 <test_test_test+30>
000018d6 <test_test_test+12> cmpl $0xc0000000,%edx
000018dc <test_test_test+18> ja 000018eb <test_test_test+27>
000018de <test_test_test+1a> movl $0xc0000000,%eax
000018e3 <test_test_test+1f> subl %edx,%eax
000018e5 <test_test_test+21> cmpl %eax,0x4(%esp,1)
000018e9 <test_test_test+25> jbe 000018f4 <test_test_test+30>
000018eb <test_test_test+27> movl $0xfffffff2,%eax
000018f0 <test_test_test+2c> ret
000018f4 <test_test_test+30>

new code:

000018c8 <test_test_test+4> movl 0x0,%eax
000018cd <test_test_test+9> andl 0x2d8(%eax), %edx

No conditional jumps, small memory footprint. This solution restricts
user-space memory size to 2G, 1G, 512M, 128M, 64M ... etc...

I've attached an experimental asm-i386/uaccess.h for 2.1.30 that
implements the first variant. The code doesnt run yet as i've still got to
sort out all x86 code that relies on 0xC0000000. [but we need this code
quite soon anyways, 2 gigs x86 boxes are already available]

about -EFAULT behaviour: 'exact' behaviour can be made a config option, or
can be done in user-space (libc, optionally), if desired.

[ and we need Jakub's MMU trick to get rid of size checks which is
implemented on the Sparc already ... will code that up too in no time if
you guys like this solution ]

[ i've added a natural migration path for the get_fs() set_fs()
ugliness: kernel_mode(), user_mode(), set_kernel_mode(), set_mode(),
etc.. ]

Comments?

-- mingo

------------------------------------------------------------------------>
#ifndef __i386_UACCESS_H
#define __i386_UACCESS_H

/*
* User space memory access functions
*/
#include <linux/sched.h>
#include <asm/segment.h>

#define VERIFY_READ 0
#define VERIFY_WRITE 1

/*
* This is the number of address bits available for user-space virtual
* memory. This is limited to power of two sizes.
*
* [but note that we can support any physical memory size, reasonable
* size is up to ... ~3.5 GBytes]
*
* Currently we define user-space memory as 2 GB:
*/

#define USER_MEM_BITS 30

#define USER_MEM_MASK (0xffffffff >> (32-USER_MEM_BITS))
#define KERNEL_MEM_MASK 0xffffffff

/*
* access_ok() currently has the side effect of cutting off
* illegal bits from a user-space pointer. If you need exact
* -EFAULT behaviour, then use the available libc wrapper that
* checks for illegal parameters in system calls.
*
* Posix says -EFAULT behaviour is undefined.
*
* [ The code expects that Jakub's MMU trick catches overwriters.
* (thus no need to check size) ]
*/

#define access_ok(type,addr,size) \
({((unsigned long)(addr) &= current->mem_mask); 1;})

#define __addr_ok(x) (!((unsigned int)(x)&(~current->mem_mask)))

extern inline int verify_area(int type, const void * addr, unsigned long size)
{
return access_ok(type,addr,size) ? 0 : -EFAULT;
}

/*
* this is the preferred way of telling the kernel that we are
* doing system calls within the kernel (thus no address space
* protection is needed):
*/

#define set_kernel_mode() (current->mem_mask=KERNEL_MEM_MASK)
#define set_user_mode() (current->mem_mask=USER_MEM_MASK)
#define get_mode() (current->mem_mask)

/*
* Old compatibility functions, they will vanish after old code is
* converted to the new *_mode() interface:
*/

#define get_fs() (!~current->mem_mask?KERNEL_DS:USER_DS)

#define set_fs(x) \
do { \
if (x==KERNEL_DS) \
current->mem_mask=KERNEL_MEM_MASK; \
else \
current->mem_mask=USER_MEM_MASK; \
} while(0)

#define get_ds() (KERNEL_DS)

/*
* Used for broken supervisor-mode protection checking in i386 CPUs:
* (FIXME: implement the #ifdef's for access_ok(x))
*/

extern int __verify_write(const void *, unsigned long);

/*
* The exception table consists of pairs of addresses: the first is the
* address of an instruction that is allowed to fault, and the second is
* the address at which the program should continue. No registers are
* modified, so it is entirely up to the continuation code to figure out
* what to do.
*
* All the routines below use bits of fixup code that are out of line
* with the main instruction path. This means when everything is well,
* we don't even have to jump over them. Further, they do not intrude
* on our cache or tlb entries.
*/

struct exception_table_entry
{
unsigned long insn, fixup;
};

/* Returns 0 if exception not found and fixup otherwise. */
extern unsigned long search_exception_table(unsigned long);

/*
* These are the main single-value transfer routines. They automatically
* use the right size if we just have the right pointer type.
*
* This gets kind of ugly. We want to return _two_ values in "get_user()"
* and yet we don't want to do any pointers, because that is too much
* of a performance impact. Thus we have a few rather ugly macros here,
* and hide all the uglyness from the user.
*
* The "__xxx" versions of the user access functions are versions that
* do not verify the address space, that must have been done previously
* with a separate "access_ok()" call (this is used when we do multiple
* accesses to the same area of user memory).
*/
#define get_user(x,ptr) \
__get_user_check((x),(ptr),sizeof(*(ptr)))
#define put_user(x,ptr) \
__put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))

#define __get_user(x,ptr) \
__get_user_nocheck((x),(ptr),sizeof(*(ptr)))
#define __put_user(x,ptr) \
__put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))

/*
* The "xxx_ret" versions return constant specified in third argument, if
* something bad happens. These macros can be optimized for the
* case of just returning from the function xxx_ret is used.
*/

#define put_user_ret(x,ptr,ret) ({ \
if (put_user(x,ptr)) return ret; })

#define get_user_ret(x,ptr,ret) ({ \
if (get_user(x,ptr)) return ret; })

#define __put_user_ret(x,ptr,ret) ({ \
if (__put_user(x,ptr)) return ret; })

#define __get_user_ret(x,ptr,ret) ({ \
if (__get_user(x,ptr)) return ret; })

extern long __put_user_bad(void);

#define __put_user_nocheck(x,ptr,size) \
({ \
long __pu_err; \
__put_user_size((x),(ptr),(size),__pu_err); \
__pu_err; \
})

#define __put_user_check(x,ptr,size) \
({ \
long __pu_err = -EFAULT; \
__typeof__(*(ptr)) *__pu_addr = (ptr); \
if (access_ok(VERIFY_WRITE,__pu_addr,size)) \
__put_user_size((x),__pu_addr,(size),__pu_err); \
__pu_err; \
})

#define __put_user_size(x,ptr,size,retval) \
do { \
retval = 0; \
switch (size) { \
case 1: __put_user_asm(x,ptr,retval,"b","b","iq"); break; \
case 2: __put_user_asm(x,ptr,retval,"w","w","ir"); break; \
case 4: __put_user_asm(x,ptr,retval,"l","","ir"); break; \
default: __put_user_bad(); \
} \
} while (0)

struct __large_struct { unsigned long buf[100]; };
#define __m(x) (*(struct __large_struct *)(x))

/*
* Tell gcc we read from memory instead of writing: this is because
* we do not write to any memory gcc knows about, so there are no
* aliasing issues.
*/
#define __put_user_asm(x, addr, err, itype, rtype, ltype) \
__asm__ __volatile__( \
"1: mov"itype" %"rtype"1,%2\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: movl %3,%0\n" \
" jmp 2b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
" .align 4\n" \
" .long 1b,3b\n" \
".previous" \
: "=r"(err) \
: ltype (x), "m"(__m(addr)), "i"(-EFAULT), "0"(err))

#define __get_user_nocheck(x,ptr,size) \
({ \
long __gu_err, __gu_val; \
__get_user_size(__gu_val,(ptr),(size),__gu_err); \
(x) = (__typeof__(*(ptr)))__gu_val; \
__gu_err; \
})

#define __get_user_check(x,ptr,size) \
({ \
long __gu_err = -EFAULT, __gu_val = 0; \
const __typeof__(*(ptr)) *__gu_addr = (ptr); \
if (access_ok(VERIFY_READ,__gu_addr,size)) \
__get_user_size(__gu_val,__gu_addr,(size),__gu_err); \
(x) = (__typeof__(*(ptr)))__gu_val; \
__gu_err; \
})

extern long __get_user_bad(void);

#define __get_user_size(x,ptr,size,retval) \
do { \
retval = 0; \
switch (size) { \
case 1: __get_user_asm(x,ptr,retval,"b","b","=q"); break; \
case 2: __get_user_asm(x,ptr,retval,"w","w","=r"); break; \
case 4: __get_user_asm(x,ptr,retval,"l","","=r"); break; \
default: (x) = __get_user_bad(); \
} \
} while (0)

#define __get_user_asm(x, addr, err, itype, rtype, ltype) \
__asm__ __volatile__( \
"1: mov"itype" %2,%"rtype"1\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: movl %3,%0\n" \
" xor"itype" %"rtype"1,%"rtype"1\n" \
" jmp 2b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
" .align 4\n" \
" .long 1b,3b\n" \
".previous" \
: "=r"(err), ltype (x) \
: "m"(__m(addr)), "i"(-EFAULT), "0"(err))

/*
* Copy To/From Userspace
*/

/* Generic arbitrary sized copy. */
#define __copy_user(to,from,size) \
__asm__ __volatile__( \
"0: rep; movsl\n" \
" movl %1,%0\n" \
"1: rep; movsb\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: lea 0(%1,%0,4),%0\n" \
" jmp 2b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
" .align 4\n" \
" .long 0b,3b\n" \
" .long 1b,2b\n" \
".previous" \
: "=c"(size) \
: "r"(size & 3), "0"(size / 4), "D"(to), "S"(from) \
: "di", "si", "memory")

/* Optimize just a little bit when we know the size of the move. */
#define __constant_copy_user(to, from, size) \
do { \
switch (size & 3) { \
default: \
__asm__ __volatile__( \
"0: rep; movsl\n" \
"1:\n" \
".section .fixup,\"ax\"\n" \
"2: shl $2,%0\n" \
" jmp 1b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
" .align 4\n" \
" .long 0b,2b\n" \
".previous" \
: "=c"(size) \
: "S"(from), "D"(to), "0"(size/4) \
: "di", "si", "memory"); \
break; \
case 1: \
__asm__ __volatile__( \
"0: rep; movsl\n" \
"1: movsb\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: shl $2,%0\n" \
"4: incl %0\n" \
" jmp 2b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
" .align 4\n" \
" .long 0b,3b\n" \
" .long 1b,4b\n" \
".previous" \
: "=c"(size) \
: "S"(from), "D"(to), "0"(size/4) \
: "di", "si", "memory"); \
break; \
case 2: \
__asm__ __volatile__( \
"0: rep; movsl\n" \
"1: movsw\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: shl $2,%0\n" \
"4: addl $2,%0\n" \
" jmp 2b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
" .align 4\n" \
" .long 0b,3b\n" \
" .long 1b,4b\n" \
".previous" \
: "=c"(size) \
: "S"(from), "D"(to), "0"(size/4) \
: "di", "si", "memory"); \
break; \
case 3: \
__asm__ __volatile__( \
"0: rep; movsl\n" \
"1: movsw\n" \
"2: movsb\n" \
"3:\n" \
".section .fixup,\"ax\"\n" \
"4: shl $2,%0\n" \
"5: addl $2,%0\n" \
"6: incl %0\n" \
" jmp 3b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
" .align 4\n" \
" .long 0b,4b\n" \
" .long 1b,5b\n" \
" .long 2b,6b\n" \
".previous" \
: "=c"(size) \
: "S"(from), "D"(to), "0"(size/4) \
: "di", "si", "memory"); \
break; \
} \
} while (0)

static inline unsigned long
__generic_copy_to_user(void *to, const void *from, unsigned long n)
{
if (access_ok(VERIFY_WRITE, to, n))
__copy_user(to,from,n);
return n;
}

static inline unsigned long
__constant_copy_to_user(void *to, const void *from, unsigned long n)
{
if (access_ok(VERIFY_WRITE, to, n))
__constant_copy_user(to,from,n);
return n;
}

static inline unsigned long
__generic_copy_from_user(void *to, const void *from, unsigned long n)
{
if (access_ok(VERIFY_READ, from, n))
__copy_user(to,from,n);
return n;
}

static inline unsigned long
__constant_copy_from_user(void *to, const void *from, unsigned long n)
{
if (access_ok(VERIFY_READ, from, n))
__constant_copy_user(to,from,n);
return n;
}

static inline unsigned long
__generic_copy_to_user_nocheck(void *to, const void *from, unsigned long n)
{
__copy_user(to,from,n);
return n;
}

static inline unsigned long
__constant_copy_to_user_nocheck(void *to, const void *from, unsigned long n)
{
__constant_copy_user(to,from,n);
return n;
}

static inline unsigned long
__generic_copy_from_user_nocheck(void *to, const void *from, unsigned long n)
{
__copy_user(to,from,n);
return n;
}

static inline unsigned long
__constant_copy_from_user_nocheck(void *to, const void *from, unsigned long n)
{
__constant_copy_user(to,from,n);
return n;
}

#define copy_to_user(to,from,n) \
(__builtin_constant_p(n) ? \
__constant_copy_to_user((to),(from),(n)) : \
__generic_copy_to_user((to),(from),(n)))

#define copy_from_user(to,from,n) \
(__builtin_constant_p(n) ? \
__constant_copy_from_user((to),(from),(n)) : \
__generic_copy_from_user((to),(from),(n)))

#define copy_to_user_ret(to,from,n,retval) ({ \
if (copy_to_user(to,from,n)) \
return retval; \
})

#define copy_from_user_ret(to,from,n,retval) ({ \
if (copy_from_user(to,from,n)) \
return retval; \
})

#define __copy_to_user(to,from,n) \
(__builtin_constant_p(n) ? \
__constant_copy_to_user_nocheck((to),(from),(n)) : \
__generic_copy_to_user_nocheck((to),(from),(n)))

#define __copy_from_user(to,from,n) \
(__builtin_constant_p(n) ? \
__constant_copy_from_user_nockeck((to),(from),(n)) : \
__generic_copy_from_user_nocheck((to),(from),(n)))

/*
* Zero Userspace
*/

#define __do_clear_user(addr,size) \
__asm__ __volatile__( \
"0: rep; stosl\n" \
" movl %1,%0\n" \
"1: rep; stosb\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: lea 0(%1,%0,4),%0\n" \
" jmp 2b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
" .align 4\n" \
" .long 0b,3b\n" \
" .long 1b,2b\n" \
".previous" \
: "=c"(size) \
: "r"(size & 3), "0"(size / 4), "D"(addr), "a"(0) \
: "di")

static inline unsigned long
clear_user(void *to, unsigned long n)
{
if (access_ok(VERIFY_WRITE, to, n))
__do_clear_user(to, n);
return n;
}

static inline unsigned long
__clear_user(void *to, unsigned long n)
{
__do_clear_user(to, n);
return n;
}

/*
* Copy a null terminated string from userspace.
*/

#define __do_strncpy_from_user(dst,src,count,res) \
__asm__ __volatile__( \
" testl %1,%1\n" \
" jz 2f\n" \
"0: lodsb\n" \
" stosb\n" \
" testb %%al,%%al\n" \
" jz 1f\n" \
" decl %1\n" \
" jnz 0b\n" \
"1: subl %1,%0\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: movl %2,%0\n" \
" jmp 2b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
" .align 4\n" \
" .long 0b,3b\n" \
".previous" \
: "=d"(res), "=c"(count) \
: "i"(-EFAULT), "0"(count), "1"(count), "S"(src), "D"(dst) \
: "si", "di", "ax", "memory")

static inline long
__strncpy_from_user(char *dst, const char *src, long count)
{
long res;
__do_strncpy_from_user(dst, src, count, res);
return res;
}

static inline long
strncpy_from_user(char *dst, const char *src, long count)
{
long res = -EFAULT;
if (access_ok(VERIFY_READ, src, 1))
__do_strncpy_from_user(dst, src, count, res);
return res;
}

/*
* Return the size of a string (including the ending 0)
*
* Return 0 for error
*/

extern inline long strlen_user(const char *s)
{
unsigned long res;

__asm__ __volatile__(
"0: repne; scasb\n"
" notl %0\n"
"1:\n"
".section .fixup,\"ax\"\n"
"2: xorl %0,%0\n"
" jmp 1b\n"
".previous\n"
".section __ex_table,\"a\"\n"
" .align 4\n"
" .long 0b,2b\n"
".previous"
:"=c" (res), "=D" (s)
:"1" (s), "a" (0), "0" (-__addr_ok(s)));
return res & -__addr_ok(s);
}

#endif /* __i386_UACCESS_H */