Hello! Attached patch introduces ROUND_UP and ROUND_DOWN global macros. These come handy to round and align various values, as shown by the usage in config/i386/ files.
In addition to target independent code, many targets could benefit from these universal macros (e.g. arm and aarch64 can immediately replace their equivalent local definitions), so I propose to put these definitions to be available globally throughout the source. From a quick look, there are a number of places these macro can be used. And, as witnessed in attached patch, it is soooo easy to forget those "-1"s: - offset = (offset + stack_alignment_needed) & -stack_alignment_needed; + offset = ROUND_UP (offset, stack_alignment_needed); [...] - offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed; + offset = ROUND_UP (offset, stack_alignment_needed); 2015-10-02 Uros Bizjak <ubiz...@gmail.com> * system.h (ROUND_UP): New macro definition. (ROUND_DOWN): Ditto. * ggc-page.c (ROUND_UP): Remove local macro definition. (PAGE_ALIGN): Implement using ROUND_UP macro. * config/i386/i386.h (PUSH_ROUNDING): Implement using ROUND_UP macro. * config/i386/i386.c (function_arg_advance_64): Use ROUND_UP macro to align values. (ix86_compute_frame_layout): Ditto. (ix86_expand_prologue): Ditto. (ix86_adjust_stack_and_probe): Use ROUND_DOWN macro to round down values. (expand_set_or_movmem_via_rep): Ditto. Patch was bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. OK for mainline? Uros.
Index: ggc-page.c =================================================================== --- ggc-page.c (revision 228380) +++ ggc-page.c (working copy) @@ -216,13 +216,9 @@ static const size_t extra_order_size_table[] = { #define ROUND_UP_VALUE(x, f) ((f) - 1 - ((f) - 1 + (x)) % (f)) -/* Compute the smallest multiple of F that is >= X. */ - -#define ROUND_UP(x, f) (CEIL (x, f) * (f)) - /* Round X to next multiple of the page size */ -#define PAGE_ALIGN(x) (((x) + G.pagesize - 1) & ~(G.pagesize - 1)) +#define PAGE_ALIGN(x) ROUND_UP ((x), G.pagesize) /* The Ith entry is the number of objects on a page or order I. */ Index: system.h =================================================================== --- system.h (revision 228380) +++ system.h (working copy) @@ -369,6 +369,12 @@ extern int errno; /* Returns the least number N such that N * Y >= X. */ #define CEIL(x,y) (((x) + (y) - 1) / (y)) +/* This macro rounds x up to the y boundary. */ +#define ROUND_UP(x,y) (((x) + (y) - 1) & ~((y) - 1)) + +/* This macro rounds x down to the y boundary. */ +#define ROUND_DOWN(x,y) ((x) & ~((y) - 1)) + #ifdef HAVE_SYS_WAIT_H #include <sys/wait.h> #endif Index: config/i386/i386.h =================================================================== --- config/i386/i386.h (revision 228380) +++ config/i386/i386.h (working copy) @@ -1596,8 +1596,7 @@ enum reg_class and -8 for 64bit targets, we need to make sure all stack pointer adjustments are in multiple of 4 for 32bit targets and 8 for 64bit targets. */ -#define PUSH_ROUNDING(BYTES) \ - (((BYTES) + UNITS_PER_WORD - 1) & -UNITS_PER_WORD) +#define PUSH_ROUNDING(BYTES) ROUND_UP (BYTES, UNITS_PER_WORD) /* If defined, the maximum amount of space required for outgoing arguments will be computed and placed into the variable `crtl->outgoing_args_size'. Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 228380) +++ config/i386/i386.c (working copy) @@ -8651,7 +8651,7 @@ function_arg_advance_64 (CUMULATIVE_ARGS *cum, mac else { int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD; - cum->words = (cum->words + align - 1) & ~(align - 1); + cum->words = ROUND_UP (cum->words, align); cum->words += words; return 0; } @@ -11285,7 +11285,7 @@ ix86_compute_frame_layout (struct ix86_frame *fram 16-byte aligned default stack, and thus we don't need to be within the re-aligned local stack frame to save them. */ gcc_assert (INCOMING_STACK_BOUNDARY >= 128); - offset = (offset + 16 - 1) & -16; + offset = ROUND_UP (offset, 16); offset += frame->nsseregs * 16; } frame->sse_reg_save_offset = offset; @@ -11295,7 +11295,7 @@ ix86_compute_frame_layout (struct ix86_frame *fram sure that no value happens to be the same before and after, force the alignment computation below to add a non-zero value. */ if (stack_realign_fp) - offset = (offset + stack_alignment_needed) & -stack_alignment_needed; + offset = ROUND_UP (offset, stack_alignment_needed); /* Va-arg area */ frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size; @@ -11308,7 +11308,7 @@ ix86_compute_frame_layout (struct ix86_frame *fram || !crtl->is_leaf || cfun->calls_alloca || ix86_current_function_calls_tls_descriptor) - offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed; + offset = ROUND_UP (offset, stack_alignment_needed); /* Frame pointer points here. */ frame->frame_pointer_offset = offset; @@ -11334,7 +11334,7 @@ ix86_compute_frame_layout (struct ix86_frame *fram or using alloca. */ if (!crtl->is_leaf || cfun->calls_alloca || ix86_current_function_calls_tls_descriptor) - offset = (offset + preferred_alignment - 1) & -preferred_alignment; + offset = ROUND_UP (offset, preferred_alignment); /* We've reached end of stack frame. */ frame->stack_pointer_offset = offset; @@ -12050,7 +12050,7 @@ ix86_adjust_stack_and_probe (const HOST_WIDE_INT s /* Step 1: round SIZE to the previous multiple of the interval. */ - rounded_size = size & -PROBE_INTERVAL; + rounded_size = ROUND_DOWN (size, PROBE_INTERVAL); /* Step 2: compute initial and final value of the loop counter. */ @@ -12204,7 +12204,7 @@ ix86_emit_probe_stack_range (HOST_WIDE_INT first, /* Step 1: round SIZE to the previous multiple of the interval. */ - rounded_size = size & -PROBE_INTERVAL; + rounded_size = ROUND_DOWN (size, PROBE_INTERVAL); /* Step 2: compute initial and final value of the loop counter. */ @@ -12663,7 +12663,7 @@ ix86_expand_prologue (void) pointer is no longer valid. As for the value of sp_offset, see ix86_compute_frame_layout, which we need to match in order to pass verification of stack_pointer_offset at the end. */ - m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes; + m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes); m->fs.sp_valid = false; } @@ -24692,8 +24692,8 @@ expand_set_or_movmem_via_rep (rtx destmem, rtx src destexp = gen_rtx_PLUS (Pmode, destptr, countreg); if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count)) { - rounded_count = (INTVAL (count) - & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1)); + rounded_count + = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode)); destmem = shallow_copy_rtx (destmem); set_mem_size (destmem, rounded_count); } @@ -24719,8 +24719,8 @@ expand_set_or_movmem_via_rep (rtx destmem, rtx src srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg); if (CONST_INT_P (count)) { - rounded_count = (INTVAL (count) - & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1)); + rounded_count + = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode)); srcmem = shallow_copy_rtx (srcmem); set_mem_size (srcmem, rounded_count); }