This change drops forced alignment to 8 if requested alignment is higher
than 8: before the patch, -falign-functions=9 was generating
.p2align 4,,8
.p2align 3
which means: "align to 16 if the skip is 8 bytes or less; else align to 8".
After this change, ".p2align 3" is not emitted.
For many generations now, x86 CPUs have at least 32, and usually 64 byte
cachelines. Aligning to a cacheline (e.g. -falign-functions=32) to avoid
needing two fetches to decode next insn makes sense, aligning to 8 bytes
within a cacheline does not. It simply wastes bytes.
I ultimately want to be able to do something like -falign-functions=64,8:
I want to align functions to 64 bytes, but only if that generates padding
of less than 8 bytes - otherwise I want *no alignment at all*.
The forced ".p2align 3" interferes with that intention.
Simple testing on a SandyBridge CPU did not reveal any performance difference
for a tight loop which starts at byte 7 inside 64-byte cacheline,
and the same loop at byte 8.
2016-08-12 Denys Vlasenko <[email protected]>
* config/i386/freebsd.h (ASM_OUTPUT_MAX_SKIP_ALIGN): Remove "If N
is large, do at least 8 byte alignment" code.
* config/i386/gnu-user.h (ASM_OUTPUT_MAX_SKIP_ALIGN): Likewise.
* config/i386/iamcu.h (ASM_OUTPUT_MAX_SKIP_ALIGN): Likewise.
* config/i386/openbsdelf.h (ASM_OUTPUT_MAX_SKIP_ALIGN): Likewise.
* config/i386/x86-64.h (ASM_OUTPUT_MAX_SKIP_ALIGN): Likewise.
Index: gcc/config/i386/freebsd.h
===================================================================
--- gcc/config/i386/freebsd.h (revision 239390)
+++ gcc/config/i386/freebsd.h (working copy)
@@ -92,25 +92,17 @@ along with GCC; see the file COPYING3. If not see
/* A C statement to output to the stdio stream FILE an assembler
command to advance the location counter to a multiple of 1<<LOG
- bytes if it is within MAX_SKIP bytes.
+ bytes if it is within MAX_SKIP bytes. */
- This is used to align code labels according to Intel recommendations. */
-
#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
#undef ASM_OUTPUT_MAX_SKIP_ALIGN
#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP) \
do { \
if ((LOG) != 0) { \
- if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
- else { \
+ if ((MAX_SKIP) == 0 || (MAX_SKIP) >= (1<<(LOG))) \
+ fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
+ else \
fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP)); \
- /* Make sure that we have at least 8 byte alignment if > 8 byte \
- alignment is preferred. */ \
- if ((LOG) > 3 \
- && (1 << (LOG)) > ((MAX_SKIP) + 1) \
- && (MAX_SKIP) >= 7) \
- fputs ("\t.p2align 3\n", (FILE)); \
- }
\
} \
} while (0)
#endif
Index: gcc/config/i386/gnu-user.h
===================================================================
--- gcc/config/i386/gnu-user.h (revision 239390)
+++ gcc/config/i386/gnu-user.h (working copy)
@@ -94,24 +94,16 @@ along with GCC; see the file COPYING3. If not see
/* A C statement to output to the stdio stream FILE an assembler
command to advance the location counter to a multiple of 1<<LOG
- bytes if it is within MAX_SKIP bytes.
+ bytes if it is within MAX_SKIP bytes. */
- This is used to align code labels according to Intel recommendations. */
-
#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP) \
do { \
if ((LOG) != 0) { \
- if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
- else { \
+ if ((MAX_SKIP) == 0 || (MAX_SKIP) >= (1<<(LOG))) \
+ fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
+ else \
fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP)); \
- /* Make sure that we have at least 8 byte alignment if > 8 byte \
- alignment is preferred. */ \
- if ((LOG) > 3 \
- && (1 << (LOG)) > ((MAX_SKIP) + 1) \
- && (MAX_SKIP) >= 7) \
- fputs ("\t.p2align 3\n", (FILE)); \
- }
\
} \
} while (0)
#endif
Index: gcc/config/i386/iamcu.h
===================================================================
--- gcc/config/i386/iamcu.h (revision 239390)
+++ gcc/config/i386/iamcu.h (working copy)
@@ -62,23 +62,15 @@ see the files COPYING3 and COPYING.RUNTIME respect
/* A C statement to output to the stdio stream FILE an assembler
command to advance the location counter to a multiple of 1<<LOG
- bytes if it is within MAX_SKIP bytes.
+ bytes if it is within MAX_SKIP bytes. */
- This is used to align code labels according to Intel recommendations. */
-
#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP) \
do { \
if ((LOG) != 0) { \
- if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
- else { \
+ if ((MAX_SKIP) == 0 || (MAX_SKIP) >= (1<<(LOG))) \
+ fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
+ else \
fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP)); \
- /* Make sure that we have at least 8 byte alignment if > 8 byte \
- alignment is preferred. */ \
- if ((LOG) > 3 \
- && (1 << (LOG)) > ((MAX_SKIP) + 1) \
- && (MAX_SKIP) >= 7) \
- fputs ("\t.p2align 3\n", (FILE)); \
- }
\
} \
} while (0)
Index: gcc/config/i386/openbsdelf.h
===================================================================
--- gcc/config/i386/openbsdelf.h (revision 239390)
+++ gcc/config/i386/openbsdelf.h (working copy)
@@ -63,24 +63,16 @@ along with GCC; see the file COPYING3. If not see
/* A C statement to output to the stdio stream FILE an assembler
command to advance the location counter to a multiple of 1<<LOG
- bytes if it is within MAX_SKIP bytes.
+ bytes if it is within MAX_SKIP bytes. */
- This is used to align code labels according to Intel recommendations. */
-
#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP) \
do { \
if ((LOG) != 0) { \
- if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
- else { \
+ if ((MAX_SKIP) == 0 || (MAX_SKIP) >= (1<<(LOG))) \
+ fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
+ else \
fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP)); \
- /* Make sure that we have at least 8 byte alignment if > 8 byte \
- alignment is preferred. */ \
- if ((LOG) > 3 \
- && (1 << (LOG)) > ((MAX_SKIP) + 1) \
- && (MAX_SKIP) >= 7) \
- fputs ("\t.p2align 3\n", (FILE)); \
- }
\
} \
} while (0)
#endif
Index: gcc/config/i386/x86-64.h
===================================================================
--- gcc/config/i386/x86-64.h (revision 239390)
+++ gcc/config/i386/x86-64.h (working copy)
@@ -65,16 +65,10 @@ see the files COPYING3 and COPYING.RUNTIME respect
#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP) \
do { \
if ((LOG) != 0) { \
- if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
- else { \
+ if ((MAX_SKIP) == 0 || (MAX_SKIP) >= (1<<(LOG))) \
+ fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
+ else \
fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP)); \
- /* Make sure that we have at least 8 byte alignment if > 8 byte \
- alignment is preferred. */ \
- if ((LOG) > 3 \
- && (1 << (LOG)) > ((MAX_SKIP) + 1) \
- && (MAX_SKIP) >= 7) \
- fputs ("\t.p2align 3\n", (FILE)); \
- }
\
} \
} while (0)
#undef ASM_OUTPUT_MAX_SKIP_PAD