Hi, the inlining heuristics are nowadays decent. In particular, at -O2 the compiler does the obvious inlinings:
- If the function body is very small (for some measure of small, see -finline-small-functions) - static functions called once (-finline-functions-called-once) Where GCC may need help is for function called with constant arguments, and the inlining would allow the deletion of untaken branches. Otherwise, in lieu of profiling data suggesting otherwise, keeping the code size smaller by avoiding inlining is probably the smart thing to do. The attached patch does this for libgfortran, that is, removes the inline attribute for static functions. The patch reduces the size of the following object files as follows: Before: text data bss dec hex filename 781 0 0 781 30d ../../trunk/objdir-git/x86_64-unknown-linux-gnu/libgfortran/cpu_time.o text data bss dec hex filename 679 0 0 679 2a7 ../../trunk/objdir-git/x86_64-unknown-linux-gnu/libgfortran/system_clock.o After: text data bss dec hex filename 660 0 0 660 294 ../../trunk/objdir-git/x86_64-unknown-linux-gnu/libgfortran/cpu_time.o text data bss dec hex filename 631 0 0 631 277 ../../trunk/objdir-git/x86_64-unknown-linux-gnu/libgfortran/system_clock.o For the other affected object files there is no change, suggesting that while the inline attributes did no harm, they did no good either. A system_clock benchmark program showed no change due to the un-inlining of gf_gettime_mono. For CPU_TIME, that results in a proper syscall (as opposed to SYSTEM_CLOCK/clock_gettime which is available as a VDSO on my system) so the overhead of that would overshadow whatever differences inlining might make, so I didn't test that. There was also an inline function (memset4) which was copy-pasted both in transfer.c and write.c; I moved it to io.h after first verifying that removing the inline attribute still caused the compiler to inline it. Committed as obvious to trunk. 2011-11-10 Janne Blomqvist <j...@gcc.gnu.org> * intrinsics/cpu_time.c (__cpu_time_1): Don't force inlining. * intrinsics/random.c (rnumber_4): Remove inline attribute. (rnumber_8, rnumber_10, rnumber_16): Likewise. * intrinsics/system_clock.c (gf_gettime_mono): Likewise. * intrinsics/time_1.h (ATTRIBUTE_ALWAYS_INLINE): Remove macro. (gf_cputime): Add inline attribute for MingW version. * io/format.c (format_hash): Remove inline attribute. * io/io.h (memset4): Inline function from transfer.c and write.c moved here. * io/transfer.c (min_off): Remove inline attribute. (memset4): Move to io.h. * io/write.c (memset4): Likewise. (memcpy4): Remove inline attribute. * io/write_float.def (calculate_exp): Likewise. -- Janne Blomqvist
diff --git a/libgfortran/intrinsics/cpu_time.c b/libgfortran/intrinsics/cpu_time.c index 619f8d2..94636c4 100644 --- a/libgfortran/intrinsics/cpu_time.c +++ b/libgfortran/intrinsics/cpu_time.c @@ -26,9 +26,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #include "time_1.h" -static inline void __cpu_time_1 (long *, long *) ATTRIBUTE_ALWAYS_INLINE; - -static inline void +static void __cpu_time_1 (long *sec, long *usec) { long user_sec, user_usec, system_sec, system_usec; diff --git a/libgfortran/intrinsics/random.c b/libgfortran/intrinsics/random.c index 8c16b85..35576b8 100644 --- a/libgfortran/intrinsics/random.c +++ b/libgfortran/intrinsics/random.c @@ -74,7 +74,7 @@ static __gthread_mutex_t random_lock; correct offset. */ -static inline void +static void rnumber_4 (GFC_REAL_4 *f, GFC_UINTEGER_4 v) { GFC_UINTEGER_4 mask; @@ -89,7 +89,7 @@ rnumber_4 (GFC_REAL_4 *f, GFC_UINTEGER_4 v) *f = (GFC_REAL_4) v * GFC_REAL_4_LITERAL(0x1.p-32); } -static inline void +static void rnumber_8 (GFC_REAL_8 *f, GFC_UINTEGER_8 v) { GFC_UINTEGER_8 mask; @@ -106,7 +106,7 @@ rnumber_8 (GFC_REAL_8 *f, GFC_UINTEGER_8 v) #ifdef HAVE_GFC_REAL_10 -static inline void +static void rnumber_10 (GFC_REAL_10 *f, GFC_UINTEGER_8 v) { GFC_UINTEGER_8 mask; @@ -126,7 +126,7 @@ rnumber_10 (GFC_REAL_10 *f, GFC_UINTEGER_8 v) /* For REAL(KIND=16), we only need to mask off the lower bits. */ -static inline void +static void rnumber_16 (GFC_REAL_16 *f, GFC_UINTEGER_8 v1, GFC_UINTEGER_8 v2) { GFC_UINTEGER_8 mask; diff --git a/libgfortran/intrinsics/system_clock.c b/libgfortran/intrinsics/system_clock.c index f4bac07..6385c4f 100644 --- a/libgfortran/intrinsics/system_clock.c +++ b/libgfortran/intrinsics/system_clock.c @@ -75,7 +75,7 @@ static int weak_gettime (clockid_t, struct timespec *) Return value: 0 for success, -1 for error. In case of error, errno is set. */ -static inline int +static int gf_gettime_mono (time_t * secs, long * nanosecs) { int err; diff --git a/libgfortran/intrinsics/time_1.h b/libgfortran/intrinsics/time_1.h index 327f7cf..aaca56a 100644 --- a/libgfortran/intrinsics/time_1.h +++ b/libgfortran/intrinsics/time_1.h @@ -97,14 +97,6 @@ localtime_r (const time_t * timep, struct tm * result) #endif -#if defined (__GNUC__) && (__GNUC__ >= 3) -# define ATTRIBUTE_ALWAYS_INLINE __attribute__ ((__always_inline__)) -#else -# define ATTRIBUTE_ALWAYS_INLINE -#endif - -static inline int gf_cputime (long *, long *, long *, long *) ATTRIBUTE_ALWAYS_INLINE; - /* Helper function for the actual implementation of the DTIME, ETIME and CPU_TIME intrinsics. Returns 0 for success or -1 if no CPU time could be computed. */ @@ -114,7 +106,7 @@ static inline int gf_cputime (long *, long *, long *, long *) ATTRIBUTE_ALWAYS_I #define WIN32_LEAN_AND_MEAN #include <windows.h> -static int +static inline int gf_cputime (long *user_sec, long *user_usec, long *system_sec, long *system_usec) { union { diff --git a/libgfortran/io/format.c b/libgfortran/io/format.c index 518dc80..1711a75 100644 --- a/libgfortran/io/format.c +++ b/libgfortran/io/format.c @@ -116,8 +116,8 @@ reset_fnode_counters (st_parameter_dt *dtp) /* A simple hashing function to generate an index into the hash table. */ -static inline -uint32_t format_hash (st_parameter_dt *dtp) +static uint32_t +format_hash (st_parameter_dt *dtp) { char *key; gfc_charlen_type key_len; diff --git a/libgfortran/io/io.h b/libgfortran/io/io.h index 5270fd7..06364e1 100644 --- a/libgfortran/io/io.h +++ b/libgfortran/io/io.h @@ -801,5 +801,14 @@ dec_waiting_unlocked (gfc_unit *u) #endif } + +static inline void +memset4 (gfc_char4_t *p, gfc_char4_t c, int k) +{ + int j; + for (j = 0; j < k; j++) + *p++ = c; +} + #endif diff --git a/libgfortran/io/transfer.c b/libgfortran/io/transfer.c index 062f80e..976102f 100644 --- a/libgfortran/io/transfer.c +++ b/libgfortran/io/transfer.c @@ -2877,7 +2877,7 @@ next_record_r_unf (st_parameter_dt *dtp, int complete_record) } -static inline gfc_offset +static gfc_offset min_off (gfc_offset a, gfc_offset b) { return (a < b ? a : b); @@ -3136,13 +3136,6 @@ sset (stream * s, int c, ssize_t nbyte) return nbyte - bytes_left; } -static inline void -memset4 (gfc_char4_t *p, gfc_char4_t c, int k) -{ - int j; - for (j = 0; j < k; j++) - *p++ = c; -} /* Position to the next record in write mode. */ diff --git a/libgfortran/io/write.c b/libgfortran/io/write.c index 95eec84..8be3a5a 100644 --- a/libgfortran/io/write.c +++ b/libgfortran/io/write.c @@ -41,15 +41,7 @@ typedef unsigned char uchar; /* Helper functions for character(kind=4) internal units. These are needed by write_float.def. */ -static inline void -memset4 (gfc_char4_t *p, gfc_char4_t c, int k) -{ - int j; - for (j = 0; j < k; j++) - *p++ = c; -} - -static inline void +static void memcpy4 (gfc_char4_t *dest, const char *source, int k) { int j; diff --git a/libgfortran/io/write_float.def b/libgfortran/io/write_float.def index 7ab70d2..78f09b2 100644 --- a/libgfortran/io/write_float.def +++ b/libgfortran/io/write_float.def @@ -774,7 +774,7 @@ write_infnan (st_parameter_dt *dtp, const fnode *f, int isnan_flag, int sign_bit /* Returns the value of 10**d. */ #define CALCULATE_EXP(x) \ -inline static GFC_REAL_ ## x \ +static GFC_REAL_ ## x \ calculate_exp_ ## x (int d)\ {\ int i;\