The -O2 optimization moves code outside of loop causing function call to be done after the data structure was unlocked. The code was actually moved to after the second usage of the variable assigned the return value of the indirect function call. The problem does not occur when the optimization level is decreased by using -O.
I have checked this on the 386 version of the compiler and the code movement does not occur. Reading specs from /usr/lib/gcc-lib/i386-redhat-linux/3.2.2/specs $ gcc -v (i386 compiler) Configured with: ../configure --prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info --enable-shared --enable-threads=posix --disable-checking --with-system-zlib --enable-__cxa_atexit --host=i386-redhat-linux Thread model: posix gcc version 3.2.2 20030222 (Red Hat Linux 3.2.2-5) The cross compiler version is: $ ppc4xx-linux-gcc -v Reading specs from /opt/timesys/linux/4.0/toolchains/ppc4xx-linux/lib/gcc-lib/powerpc-linux/3.2/specs Configured with: ../package/configure --build=i686-linux --host=i686-linux --target=powerpc-linux --prefix=/opt/timesys/linux/4.0/toolchains/ppc4xx-linux --enable-threads=posix --program-transform-name='s,^,ppc4xx-linux-,' --enable-languages=c,c++ --enable-shared --with-gnu-as --with-gnu-ld --nfp --with-stabs Thread model: posix gcc version 3.2 The uname information is: Linux gard8000 2.4.20-8 #1 Thu Mar 13 17:54:28 EST 2003 i686 i686 i386 GNU/Linux System Info $ cat /proc/cpuinfo processor : 0 vendor_id : GenuineIntel cpu family : 15 model : 2 model name : Intel(R) Pentium(R) 4 CPU 2.26GHz stepping : 9 cpu MHz : 2261.049 cache size : 512 KB fdiv_bug : no hlt_bug : no f00f_bug : no coma_bug : no fpu : yes fpu_exception : yes cpuid level : 2 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm bogomips : 4508.87 $ cat /proc/meminfo total: used: free: shared: buffers: cached: Mem: 525639680 510992384 14647296 0 90320896 367050752 Swap: 1069244416 98328576 970915840 MemTotal: 513320 kB MemFree: 14304 kB MemShared: 0 kB Buffers: 88204 kB Cached: 337652 kB SwapCached: 20796 kB Active: 372652 kB ActiveAnon: 26032 kB ActiveCache: 346620 kB Inact_dirty: 0 kB Inact_laundry: 69508 kB Inact_clean: 10852 kB Inact_target: 90600 kB HighTotal: 0 kB HighFree: 0 kB LowTotal: 513320 kB LowFree: 14304 kB SwapTotal: 1044184 kB SwapFree: 948160 kB $ cat /proc/version Linux version 2.4.20-8 ([EMAIL PROTECTED]) (gcc version 3.2.2 20030222 (Red Hat Linux 3.2.2-5)) #1 Thu Mar 13 17:54:28 EST 2003 The compilation lines for the error is in the file COMP_BUG, $ cat COMP_BUG ppc4xx-linux-gcc -save-temps \ -D__KERNEL__ \ -I/opt/timesys/linux/4.0/ep405/src/2.4.18-timesys-4.0/kernel/include \ -Wstrict-prototypes \ -Wno-trigraphs \ -O2 \ -fno-strict-aliasing \ -fno-common \ -finline-limit=5000 \ -fomit-frame-pointer \ -D__powerpc__ \ -fsigned-char \ -msoft-float \ -pipe \ -ffixed-r2 \ -Wno-uninitialized \ -mmultiple \ -mstring \ -I/opt/timesys/linux/4.0/ep405/src/2.4.18-timesys-4.0/kernel/arch/ppc \ -mcpu=405 \ -Wa,-m405 \ -Werror \ -g3 \ -DKBUILD_BASENAME=select \ -c \ -o select.o select.c and the compilation line generating code without the error is the file NOCOMP_BUG. $ cat NOCOMP_BUG ppc4xx-linux-gcc \ -D__KERNEL__ \ -I/opt/timesys/linux/4.0/ep405/src/2.4.18-timesys-4.0/kernel/include \ -Wstrict-prototypes \ -Wno-trigraphs \ -O \ -fno-strict-aliasing \ -fno-common \ -finline-limit=5000 \ -fomit-frame-pointer \ -D__powerpc__ \ -fsigned-char \ -msoft-float \ -pipe \ -ffixed-r2 \ -Wno-uninitialized \ -mmultiple \ -mstring \ -I/opt/timesys/linux/4.0/ep405/src/2.4.18-timesys-4.0/kernel/arch/ppc \ -mcpu=405 \ -Wa,-m405 \ -Werror \ -g3 \ -DKBUILD_BASENAME=select \ -c \ -o select.o select.c The source file is fs/select.c which is from the timesys linux 2.4.18-timesys4.0 kernel. The source file and headers follow. I have included all the header files for successful compilation. My contact information is: Jeff Fellin RFL Electronics 973-334-3100 x327 [EMAIL PROTECTED] ========= select.c ============================================ /* * This file contains the procedures for the handling of select and poll * * Created for Linux based loosely upon Mathius Lattner's minix * patches by Peter MacDonald. Heavily edited by Linus. * * 4 February 1994 * COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS * flag set in its personality we do *not* modify the given timeout * parameter to reflect time remaining. * * 24 January 2000 * Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation * of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian). */ #include "slab.h" #include "smp_lock.h" #include "poll.h" #include "personality.h" /* for STICKY_TIMEOUTS */ #include "file.h" #include "timespec.h" #include "uaccess.h" #define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM) static void do_pollfd(unsigned int num, struct pollfd * fdpage, poll_table ** pwait, int *count) { int i; for (i = 0; i < num; i++) { int fd; unsigned int mask; struct pollfd *fdp; mask = 0; fdp = fdpage+i; fd = fdp->fd; if (fd >= 0) { struct file * file = fget(fd); mask = POLLNVAL; if (file != NULL) { mask = DEFAULT_POLLMASK; if (file->f_op && file->f_op->poll) mask = file->f_op->poll(file, *pwait); mask &= fdp->events | POLLERR | POLLHUP; fput(file); } if (mask) { *pwait = NULL; (*count)++; } } fdp->revents = mask; } } =============================================================== ========= file.h ============================================== /* * Wrapper functions for accessing the file_struct fd array. */ #ifndef __LINUX_FILE_H #define __LINUX_FILE_H extern void FASTCALL(fput(struct file *)); extern struct file * FASTCALL(fget(unsigned int fd)); static inline int get_close_on_exec(unsigned int fd) { struct files_struct *files = current->files; int res; read_lock(&files->file_lock); res = FD_ISSET(fd, files->close_on_exec); read_unlock(&files->file_lock); return res; } static inline void set_close_on_exec(unsigned int fd, int flag) { struct files_struct *files = current->files; write_lock(&files->file_lock); if (flag) FD_SET(fd, files->close_on_exec); else FD_CLR(fd, files->close_on_exec); write_unlock(&files->file_lock); } static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd) { struct file * file = NULL; if (fd < (unsigned int) files->max_fds) file = files->fd[fd]; return file; } /* * Check whether the specified fd has an open file. */ static inline struct file * fcheck(unsigned int fd) { struct file * file = NULL; struct files_struct *files = current->files; if (fd < (unsigned int) files->max_fds) file = files->fd[fd]; return file; } extern void put_filp(struct file *); extern int get_unused_fd(void); static inline void __put_unused_fd(struct files_struct *files, unsigned int fd) { FD_CLR(fd, files->open_fds); if (fd < (unsigned int) files->next_fd) files->next_fd = fd; } static inline void put_unused_fd(unsigned int fd) { struct files_struct *files = current->files; write_lock(&files->file_lock); __put_unused_fd(files, fd); write_unlock(&files->file_lock); } /* * Install a file pointer in the fd array. * * The VFS is full of places where we drop the files lock between * setting the open_fds bitmap and installing the file in the file * array. At any such point, we are vulnerable to a dup2() race * installing a file in the array before us. We need to detect this and * fput() the struct file we are about to overwrite in this case. * * It should never happen - if we allow dup2() do it, _really_ bad things * will follow. */ static inline void fd_install(unsigned int fd, struct file * file) { struct files_struct *files = current->files; write_lock(&files->file_lock); if (files->fd[fd]) BUG(); files->fd[fd] = file; write_unlock(&files->file_lock); } void put_files_struct(struct files_struct *fs); #endif /* __LINUX_FILE_H */ =============================================================== ========= personality.h ======================================= #ifndef _LINUX_PERSONALITY_H #define _LINUX_PERSONALITY_H /* * Handling of different ABIs (personalities). */ struct exec_domain; struct pt_regs; extern int register_exec_domain(struct exec_domain *); extern int unregister_exec_domain(struct exec_domain *); extern int __set_personality(unsigned long); /* * Sysctl variables related to binary emulation. */ extern unsigned long abi_defhandler_coff; extern unsigned long abi_defhandler_elf; extern unsigned long abi_defhandler_lcall7; extern unsigned long abi_defhandler_libcso; extern int abi_fake_utsname; /* * Flags for bug emulation. * * These occupy the top three bytes. */ enum { MMAP_PAGE_ZERO = 0x0100000, ADDR_LIMIT_32BIT = 0x0800000, SHORT_INODE = 0x1000000, WHOLE_SECONDS = 0x2000000, STICKY_TIMEOUTS = 0x4000000, }; /* * Personality types. * * These go in the low byte. Avoid using the top bit, it will * conflict with error returns. */ enum { PER_LINUX = 0x0000, PER_LINUX_32BIT = 0x0000 | ADDR_LIMIT_32BIT, PER_SVR4 = 0x0001 | STICKY_TIMEOUTS | MMAP_PAGE_ZERO, PER_SVR3 = 0x0002 | STICKY_TIMEOUTS | SHORT_INODE, PER_SCOSVR3 = 0x0003 | STICKY_TIMEOUTS | WHOLE_SECONDS | SHORT_INODE, PER_OSR5 = 0x0003 | STICKY_TIMEOUTS | WHOLE_SECONDS, PER_WYSEV386 = 0x0004 | STICKY_TIMEOUTS | SHORT_INODE, PER_ISCR4 = 0x0005 | STICKY_TIMEOUTS, PER_BSD = 0x0006, PER_SUNOS = 0x0006 | STICKY_TIMEOUTS, PER_XENIX = 0x0007 | STICKY_TIMEOUTS | SHORT_INODE, PER_LINUX32 = 0x0008, PER_IRIX32 = 0x0009 | STICKY_TIMEOUTS,/* IRIX5 32-bit */ PER_IRIXN32 = 0x000a | STICKY_TIMEOUTS,/* IRIX6 new 32-bit */ PER_IRIX64 = 0x000b | STICKY_TIMEOUTS,/* IRIX6 64-bit */ PER_RISCOS = 0x000c, PER_SOLARIS = 0x000d | STICKY_TIMEOUTS, PER_UW7 = 0x000e | STICKY_TIMEOUTS | MMAP_PAGE_ZERO, PER_MASK = 0x00ff, }; /* * Description of an execution domain. * * The first two members are refernced from assembly source * and should stay where they are unless explicitly needed. */ typedef void (*handler_t)(int, struct pt_regs *); struct exec_domain { const char *name; /* name of the execdomain */ handler_t handler; /* handler for syscalls */ unsigned char pers_low; /* lowest personality */ unsigned char pers_high; /* highest personality */ unsigned long *signal_map; /* signal mapping */ unsigned long *signal_invmap; /* reverse signal mapping */ struct map_segment *err_map; /* error mapping */ struct map_segment *socktype_map; /* socket type mapping */ struct map_segment *sockopt_map; /* socket option mapping */ struct map_segment *af_map; /* address family mapping */ struct module *module; /* module context of the ed. */ struct exec_domain *next; /* linked list (internal) */ }; /* * Return the base personality without flags. */ #define personality(pers) (pers & PER_MASK) /* * Personality of the currently running process. */ #define get_personality (current->personality) /* * Change personality of the currently running process. */ #define set_personality(pers) \ ((current->personality == pers) ? 0 : __set_personality(pers)) /* * Load an execution domain. */ #define get_exec_domain(ep) \ do { \ if (ep != NULL && ep->module != NULL) \ __MOD_INC_USE_COUNT(ep->module); \ } while (0) /* * Unload an execution domain. */ #define put_exec_domain(ep) \ do { \ if (ep != NULL && ep->module != NULL) \ __MOD_DEC_USE_COUNT(ep->module); \ } while (0) #endif /* _LINUX_PERSONALITY_H */ =============================================================== ========= poll.h ============================================== #ifndef _LINUX_POLL_H #define _LINUX_POLL_H #include <asm/poll.h> #ifdef __KERNEL__ #include <linux/wait.h> #include <linux/string.h> #include <linux/mm.h> #include <asm/uaccess.h> #include <linux/time.h> struct poll_table_page; typedef struct poll_table_struct { int error; struct poll_table_page * table; } poll_table; extern void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p); static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p) { if (p && wait_address) __pollwait(filp, wait_address, p); } static inline void poll_initwait(poll_table* pt) { pt->error = 0; pt->table = NULL; } extern void poll_freewait(poll_table* pt); /* * Scaleable version of the fd_set. */ typedef struct { unsigned long *in, *out, *ex; unsigned long *res_in, *res_out, *res_ex; } fd_set_bits; /* * How many longwords for "nr" bits? */ #define FDS_BITPERLONG (8*sizeof(long)) #define FDS_LONGS(nr) (((nr)+FDS_BITPERLONG-1)/FDS_BITPERLONG) #define FDS_BYTES(nr) (FDS_LONGS(nr)*sizeof(long)) /* * We do a VERIFY_WRITE here even though we are only reading this time: * we'll write to it eventually.. * * Use "unsigned long" accesses to let user-mode fd_set's be long-aligned. */ static inline int get_fd_set(unsigned long nr, void *ufdset, unsigned long *fdset) { nr = FDS_BYTES(nr); if (ufdset) { int error; error = verify_area(VERIFY_WRITE, ufdset, nr); if (!error && __copy_from_user(fdset, ufdset, nr)) error = -EFAULT; return error; } memset(fdset, 0, nr); return 0; } static inline void set_fd_set(unsigned long nr, void *ufdset, unsigned long *fdset) { if (ufdset) __copy_to_user(ufdset, fdset, FDS_BYTES(nr)); } static inline void zero_fd_set(unsigned long nr, unsigned long *fdset) { memset(fdset, 0, FDS_BYTES(nr)); } extern int do_select(int n, fd_set_bits *fds, struct timespec *timeout); #endif /* KERNEL */ #endif /* _LINUX_POLL_H */ =============================================================== ========= slab.h ============================================== /* * linux/mm/slab.h * Written by Mark Hemment, 1996. * ([EMAIL PROTECTED]) */ #if !defined(_LINUX_SLAB_H) #define _LINUX_SLAB_H #if defined(__KERNEL__) typedef struct kmem_cache_s kmem_cache_t; #include <linux/mm.h> #include <linux/cache.h> /* flags for kmem_cache_alloc() */ #define SLAB_NOFS GFP_NOFS #define SLAB_NOIO GFP_NOIO #define SLAB_NOHIGHIO GFP_NOHIGHIO #define SLAB_ATOMIC GFP_ATOMIC #define SLAB_USER GFP_USER #define SLAB_KERNEL GFP_KERNEL #define SLAB_NFS GFP_NFS #define SLAB_DMA GFP_DMA #define SLAB_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_HIGHIO|__GFP_FS) #define SLAB_NO_GROW 0x00001000UL /* don't grow a cache */ /* flags to pass to kmem_cache_create(). * The first 3 are only valid when the allocator as been build * SLAB_DEBUG_SUPPORT. */ #define SLAB_DEBUG_FREE 0x00000100UL /* Peform (expensive) checks on free */ #define SLAB_DEBUG_INITIAL 0x00000200UL /* Call constructor (as verifier) */ #define SLAB_RED_ZONE 0x00000400UL /* Red zone objs in a cache */ #define SLAB_POISON 0x00000800UL /* Poison objects */ #define SLAB_NO_REAP 0x00001000UL /* never reap from the cache */ #define SLAB_HWCACHE_ALIGN 0x00002000UL /* align objs on a h/w cache lines */ #define SLAB_CACHE_DMA 0x00004000UL /* use GFP_DMA memory */ #define SLAB_MUST_HWCACHE_ALIGN 0x00008000UL /* force alignment */ /* flags passed to a constructor func */ #define SLAB_CTOR_CONSTRUCTOR 0x001UL /* if not set, then deconstructor */ #define SLAB_CTOR_ATOMIC 0x002UL /* tell constructor it can't sleep */ #define SLAB_CTOR_VERIFY 0x004UL /* tell constructor it's a verify call */ /* prototypes */ extern void kmem_cache_init(void); extern void kmem_cache_sizes_init(void); extern kmem_cache_t *kmem_find_general_cachep(size_t, int gfpflags); extern kmem_cache_t *kmem_cache_create(const char *, size_t, size_t, unsigned long, void (*)(void *, kmem_cache_t *, unsigned long), void (*)(void *, kmem_cache_t *, unsigned long)); extern int kmem_cache_destroy(kmem_cache_t *); extern int kmem_cache_shrink(kmem_cache_t *); extern void *kmem_cache_alloc(kmem_cache_t *, int); extern void kmem_cache_free(kmem_cache_t *, void *); extern void *kmalloc(size_t, int); extern void kfree(const void *); extern int FASTCALL(kmem_cache_reap(int)); extern int slabinfo_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data); extern int slabinfo_write_proc(struct file *file, const char *buffer, unsigned long count, void *data); /* System wide caches */ extern kmem_cache_t *vm_area_cachep; extern kmem_cache_t *mm_cachep; extern kmem_cache_t *names_cachep; extern kmem_cache_t *files_cachep; extern kmem_cache_t *filp_cachep; extern kmem_cache_t *dquot_cachep; extern kmem_cache_t *bh_cachep; extern kmem_cache_t *fs_cachep; extern kmem_cache_t *sigact_cachep; #endif /* __KERNEL__ */ #endif /* _LINUX_SLAB_H */ =============================================================== ========= smp_lock.h ========================================== #ifndef __LINUX_SMPLOCK_H #define __LINUX_SMPLOCK_H #include <linux/config.h> #include <linux/spinlock.h> extern spinlock_t big_kernel_lock; extern inline void lock_kernel(void) { if (!++current->lock_depth) spin_lock(&big_kernel_lock); } extern inline void unlock_kernel(void) { if (!current->lock_depth) spin_unlock(&big_kernel_lock); current->lock_depth--; } #define kernel_locked() mutex_is_locked(&big_kernel_lock) #endif =============================================================== ========= timespec.h ========================================== #ifndef _TIMESPEC_H_ #define _TIMESPEC_H_ #define NANOSEC_PER_SEC (1000000000L) #define USEC_PER_SEC (1000000L) #define timespec_zero(time) { (time).tv_sec = 0; (time).tv_nsec = 0; } #define timespec_add_nsec(result, nanos) do { \ if (((result).tv_nsec += (nanos)) >= NANOSEC_PER_SEC) { \ (result).tv_nsec -= NANOSEC_PER_SEC; \ (result).tv_sec++; \ } \ } while (0) #define timespec_add(result, addend) do { \ (result).tv_nsec += (addend).tv_nsec; \ (result).tv_sec += (addend).tv_sec; \ if ((result).tv_nsec >= NANOSEC_PER_SEC) { \ (result).tv_nsec -= NANOSEC_PER_SEC; \ (result).tv_sec++; \ } \ } while (0) #define timespec_sub(result, subtrahend) do { \ if ((result).tv_nsec >= (subtrahend).tv_nsec) { \ (result).tv_nsec -= (subtrahend).tv_nsec; \ (result).tv_sec -= (subtrahend).tv_sec; \ } else { \ (result).tv_nsec += NANOSEC_PER_SEC; \ (result).tv_nsec -= (subtrahend).tv_nsec; \ (result).tv_sec -= (subtrahend).tv_sec + 1; \ } \ } while (0) #define timespec_set(time, newtime) do { \ (time).tv_sec = (newtime).tv_sec; \ (time).tv_nsec = (newtime).tv_nsec; \ } while (0) #define timespec_cmp(time1, time2) \ (((time1).tv_sec < (time2).tv_sec) || \ (((time1).tv_sec == (time2).tv_sec) && \ ((time1).tv_nsec <= (time2).tv_nsec))) #define timespec_ge(time1, time2) \ (((time1).tv_sec > (time2).tv_sec) || \ (((time1).tv_sec == (time2).tv_sec) && \ ((time1).tv_nsec >= (time2).tv_nsec))) #define timespec_gt(time1, time2) \ (((time1).tv_sec > (time2).tv_sec) || \ (((time1).tv_sec == (time2).tv_sec) && \ ((time1).tv_nsec > (time2).tv_nsec))) #define timespec_le(time1, time2) \ (((time1).tv_sec < (time2).tv_sec) || \ (((time1).tv_sec == (time2).tv_sec) && \ ((time1).tv_nsec <= (time2).tv_nsec))) #define timespec_lt(time1, time2) \ (((time1).tv_sec < (time2).tv_sec) || \ (((time1).tv_sec == (time2).tv_sec) && \ ((time1).tv_nsec < (time2).tv_nsec))) #define timespec_eq(time1,time2) \ (((time1).tv_sec == (time2).tv_sec) && \ ((time1).tv_nsec == (time2).tv_nsec)) #define timespec_min(time1,time2) \ (timespec_le((time1),(time2)) ? (time1) : (time2)) #define timespec_max(time1,time2) \ (timespec_ge((time1),(time2)) ? (time1) : (time2)) #define timespec_ne(time1,time2) \ (((time1).tv_sec != (time2).tv_sec) || \ ((time1).tv_nsec != (time2).tv_nsec)) #define timespec_nonzero(time) \ ((time).tv_nsec || (time).tv_sec) #define timespec_valid(time) \ ((time).tv_sec >= 0 && \ (time).tv_nsec >= 0 && \ (time).tv_nsec < NANOSEC_PER_SEC) #define timespec2micro(time) \ (((time).tv_sec * USEC_PER_SEC) + ((time).tv_nsec / 1000)) #define timespec2nano(time) \ ((((unsigned long long)(time).tv_sec) * NANOSEC_PER_SEC) + ((time).tv_nsec)) #define nano2timespec(nanos, ts) do {\ ts.tv_sec = nanos / NANOSEC_PER_SEC; \ ts.tv_nsec = nanos - ts.tv_sec * NANOSEC_PER_SEC; \ } while (0) #endif /* _TIMESPEC_H_ */ =============================================================== ========= uaccess.h =========================================== /* * BK Id: SCCS/s.uaccess.h 1.8 09/11/01 18:10:06 paulus */ #ifdef __KERNEL__ #ifndef _PPC_UACCESS_H #define _PPC_UACCESS_H #ifndef __ASSEMBLY__ #include <linux/sched.h> #include <linux/errno.h> #include <asm/processor.h> #define VERIFY_READ 0 #define VERIFY_WRITE 1 /* * The fs value determines whether argument validity checking should be * performed or not. If get_fs() == USER_DS, checking is performed, with * get_fs() == KERNEL_DS, checking is bypassed. * * For historical reasons, these macros are grossly misnamed. */ #define KERNEL_DS ((mm_segment_t) { 0 }) #define USER_DS ((mm_segment_t) { 1 }) #define get_ds() (KERNEL_DS) #define get_fs() (current->thread.fs) #define set_fs(val) (current->thread.fs = (val)) #define segment_eq(a,b) ((a).seg == (b).seg) #define __kernel_ok (segment_eq(get_fs(), KERNEL_DS)) #define __user_ok(addr,size) (((size) <= TASK_SIZE)&&((addr) <= TASK_SIZE-(size))) #define __access_ok(addr,size) (__kernel_ok || __user_ok((addr),(size))) #define access_ok(type,addr,size) __access_ok((unsigned long)(addr),(size)) extern inline int verify_area(int type, const void * addr, unsigned long size) { return access_ok(type,addr,size) ? 0 : -EFAULT; } /* * The exception table consists of pairs of addresses: the first is the * address of an instruction that is allowed to fault, and the second is * the address at which the program should continue. No registers are * modified, so it is entirely up to the continuation code to figure out * what to do. * * All the routines below use bits of fixup code that are out of line * with the main instruction path. This means when everything is well, * we don't even have to jump over them. Further, they do not intrude * on our cache or tlb entries. */ struct exception_table_entry { unsigned long insn, fixup; }; /* Returns 0 if exception not found and fixup otherwise. */ extern unsigned long search_exception_table(unsigned long); extern void sort_exception_table(void); /* * These are the main single-value transfer routines. They automatically * use the right size if we just have the right pointer type. * * This gets kind of ugly. We want to return _two_ values in "get_user()" * and yet we don't want to do any pointers, because that is too much * of a performance impact. Thus we have a few rather ugly macros here, * and hide all the uglyness from the user. * * The "__xxx" versions of the user access functions are versions that * do not verify the address space, that must have been done previously * with a separate "access_ok()" call (this is used when we do multiple * accesses to the same area of user memory). * * As we use the same address space for kernel and user data on the * PowerPC, we can just do these as direct assignments. (Of course, the * exception handling means that it's no longer "just"...) */ #define get_user(x,ptr) \ __get_user_check((x),(ptr),sizeof(*(ptr))) #define put_user(x,ptr) \ __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr))) #define __get_user(x,ptr) \ __get_user_nocheck((x),(ptr),sizeof(*(ptr))) #define __put_user(x,ptr) \ __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr))) extern long __put_user_bad(void); #define __put_user_nocheck(x,ptr,size) \ ({ \ long __pu_err; \ __put_user_size((x),(ptr),(size),__pu_err); \ __pu_err; \ }) #define __put_user_check(x,ptr,size) \ ({ \ long __pu_err = -EFAULT; \ __typeof__(*(ptr)) *__pu_addr = (ptr); \ if (access_ok(VERIFY_WRITE,__pu_addr,size)) \ __put_user_size((x),__pu_addr,(size),__pu_err); \ __pu_err; \ }) #define __put_user_size(x,ptr,size,retval) \ do { \ retval = 0; \ switch (size) { \ case 1: __put_user_asm(x,ptr,retval,"stb"); break; \ case 2: __put_user_asm(x,ptr,retval,"sth"); break; \ case 4: __put_user_asm(x,ptr,retval,"stw"); break; \ case 8: __put_user_asm2(x,ptr,retval); break; \ default: __put_user_bad(); \ } \ } while (0) struct __large_struct { unsigned long buf[100]; }; #define __m(x) (*(struct __large_struct *)(x)) /* * We don't tell gcc that we are accessing memory, but this is OK * because we do not write to any memory gcc knows about, so there * are no aliasing issues. */ #define __put_user_asm(x, addr, err, op) \ __asm__ __volatile__( \ "1: "op" %1,0(%2)\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: li %0,%3\n" \ " b 2b\n" \ ".previous\n" \ ".section __ex_table,\"a\"\n" \ " .align 2\n" \ " .long 1b,3b\n" \ ".previous" \ : "=r"(err) \ : "r"(x), "b"(addr), "i"(-EFAULT), "0"(err)) #define __put_user_asm2(x, addr, err) \ __asm__ __volatile__( \ "1: stw %1,0(%2)\n" \ "2: stw %1+1,4(%2)\n" \ "3:\n" \ ".section .fixup,\"ax\"\n" \ "4: li %0,%3\n" \ " b 3b\n" \ ".previous\n" \ ".section __ex_table,\"a\"\n" \ " .align 2\n" \ " .long 1b,4b\n" \ " .long 2b,4b\n" \ ".previous" \ : "=r"(err) \ : "r"(x), "b"(addr), "i"(-EFAULT), "0"(err)) #define __get_user_nocheck(x,ptr,size) \ ({ \ long __gu_err, __gu_val; \ __get_user_size(__gu_val,(ptr),(size),__gu_err); \ (x) = (__typeof__(*(ptr)))__gu_val; \ __gu_err; \ }) #define __get_user_check(x,ptr,size) \ ({ \ long __gu_err = -EFAULT, __gu_val = 0; \ const __typeof__(*(ptr)) *__gu_addr = (ptr); \ if (access_ok(VERIFY_READ,__gu_addr,size)) \ __get_user_size(__gu_val,__gu_addr,(size),__gu_err); \ (x) = (__typeof__(*(ptr)))__gu_val; \ __gu_err; \ }) extern long __get_user_bad(void); #define __get_user_size(x,ptr,size,retval) \ do { \ retval = 0; \ switch (size) { \ case 1: __get_user_asm(x,ptr,retval,"lbz"); break; \ case 2: __get_user_asm(x,ptr,retval,"lhz"); break; \ case 4: __get_user_asm(x,ptr,retval,"lwz"); break; \ case 8: __get_user_asm2(x, ptr, retval); \ default: (x) = __get_user_bad(); \ } \ } while (0) #define __get_user_asm(x, addr, err, op) \ __asm__ __volatile__( \ "1: "op" %1,0(%2)\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: li %0,%3\n" \ " li %1,0\n" \ " b 2b\n" \ ".previous\n" \ ".section __ex_table,\"a\"\n" \ " .align 2\n" \ " .long 1b,3b\n" \ ".previous" \ : "=r"(err), "=r"(x) \ : "b"(addr), "i"(-EFAULT), "0"(err)) #define __get_user_asm2(x, addr, err) \ __asm__ __volatile__( \ "1: lwz %1,0(%2)\n" \ "2: lwz %1+1,4(%2)\n" \ "3:\n" \ ".section .fixup,\"ax\"\n" \ "4: li %0,%3\n" \ " li %1,0\n" \ " li %1+1,0\n" \ " b 3b\n" \ ".previous\n" \ ".section __ex_table,\"a\"\n" \ " .align 2\n" \ " .long 1b,4b\n" \ " .long 2b,4b\n" \ ".previous" \ : "=r"(err), "=&r"(x) \ : "b"(addr), "i"(-EFAULT), "0"(err)) /* more complex routines */ extern int __copy_tofrom_user(void *to, const void *from, unsigned long size); extern inline unsigned long copy_from_user(void *to, const void *from, unsigned long n) { unsigned long over; if (access_ok(VERIFY_READ, from, n)) return __copy_tofrom_user(to, from, n); if ((unsigned long)from < TASK_SIZE) { over = (unsigned long)from + n - TASK_SIZE; return __copy_tofrom_user(to, from, n - over) + over; } return n; } extern inline unsigned long copy_to_user(void *to, const void *from, unsigned long n) { unsigned long over; if (access_ok(VERIFY_WRITE, to, n)) return __copy_tofrom_user(to, from, n); if ((unsigned long)to < TASK_SIZE) { over = (unsigned long)to + n - TASK_SIZE; return __copy_tofrom_user(to, from, n - over) + over; } return n; } #define __copy_from_user(to, from, size) \ __copy_tofrom_user((to), (from), (size)) #define __copy_to_user(to, from, size) \ __copy_tofrom_user((to), (from), (size)) extern unsigned long __clear_user(void *addr, unsigned long size); extern inline unsigned long clear_user(void *addr, unsigned long size) { if (access_ok(VERIFY_WRITE, addr, size)) return __clear_user(addr, size); if ((unsigned long)addr < TASK_SIZE) { unsigned long over = (unsigned long)addr + size - TASK_SIZE; return __clear_user(addr, size - over) + over; } return size; } extern int __strncpy_from_user(char *dst, const char *src, long count); extern inline long strncpy_from_user(char *dst, const char *src, long count) { if (access_ok(VERIFY_READ, src, 1)) return __strncpy_from_user(dst, src, count); return -EFAULT; } /* * Return the size of a string (including the ending 0) * * Return 0 for error */ extern int __strnlen_user(const char *str, long len, unsigned long top); * The `top' parameter to __strnlen_user is to make sure that * we can never overflow from the user area into kernel space. */ extern __inline__ int strnlen_user(const char *str, long len) { unsigned long top = __kernel_ok? ~0UL: TASK_SIZE - 1; if ((unsigned long)str > top) return 0; return __strnlen_user(str, len, top); } #define strlen_user(str) strnlen_user((str), 0x7ffffffe) #endif /* __ASSEMBLY__ */ #endif /* _PPC_UACCESS_H */ #endif /* __KERNEL__ */ =============================================================== -- Summary: cross compiler for power PC -O2 optimization moves code outside of use Product: gcc Version: 3.2 Status: UNCONFIRMED Severity: normal Priority: P2 Component: tree-optimization AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: Jeff dot Fellin at rflelect dot com CC: Jeff dot Fellin at rflelect dot com,gcc-bugs at gcc dot gnu dot org GCC build triplet: i686-linux GCC host triplet: i686-linux GCC target triplet: powerpc-linux http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19570