This final patch adds the clobber high expressions to tls_desc for aarch64. It also adds three tests.
In addition I also tested by taking the gcc torture test suite and making all global variables __thread. Then emended the suite to compile with -fpic, save the .s file and only for one given O level. I ran this before and after the patch and compared the resulting .s files, ensuring that there were no ASM changes. I discarded the 10% of tests that failed to compile (due to the code in the test now being invalid C). I did this for O0,O2,O3 on both x86 and aarch64 and observed no difference between ASM files before and after the patch. Alan. 2017-11-16 Alan Hayward <alan.hayw...@arm.com> gcc/ * config/aarch64/aarch64.md: Add clobber highs to tls_desc. gcc/testsuite/ * gcc.target/aarch64/sve_tls_preserve_1.c: New test. * gcc.target/aarch64/sve_tls_preserve_2.c: New test. * gcc.target/aarch64/sve_tls_preserve_3.c: New test. diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 6a15ff0b61d775cf30189b8503cfa45987701228..1f332b254fe0e37954efbe92982f214100d7046f 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -57,7 +57,36 @@ (LR_REGNUM 30) (SP_REGNUM 31) (V0_REGNUM 32) + (V1_REGNUM 33) + (V2_REGNUM 34) + (V3_REGNUM 35) + (V4_REGNUM 36) + (V5_REGNUM 37) + (V6_REGNUM 38) + (V7_REGNUM 39) + (V8_REGNUM 40) + (V9_REGNUM 41) + (V10_REGNUM 42) + (V11_REGNUM 43) + (V12_REGNUM 44) + (V13_REGNUM 45) + (V14_REGNUM 46) (V15_REGNUM 47) + (V16_REGNUM 48) + (V17_REGNUM 49) + (V18_REGNUM 50) + (V19_REGNUM 51) + (V20_REGNUM 52) + (V21_REGNUM 53) + (V22_REGNUM 54) + (V23_REGNUM 55) + (V24_REGNUM 56) + (V25_REGNUM 57) + (V26_REGNUM 58) + (V27_REGNUM 59) + (V28_REGNUM 60) + (V29_REGNUM 61) + (V30_REGNUM 62) (V31_REGNUM 63) (LAST_SAVED_REGNUM 63) (SFP_REGNUM 64) @@ -5745,6 +5774,38 @@ UNSPEC_TLSDESC)) (clobber (reg:DI LR_REGNUM)) (clobber (reg:CC CC_REGNUM)) + (clobber_high (reg:TI V0_REGNUM)) + (clobber_high (reg:TI V1_REGNUM)) + (clobber_high (reg:TI V2_REGNUM)) + (clobber_high (reg:TI V3_REGNUM)) + (clobber_high (reg:TI V4_REGNUM)) + (clobber_high (reg:TI V5_REGNUM)) + (clobber_high (reg:TI V6_REGNUM)) + (clobber_high (reg:TI V7_REGNUM)) + (clobber_high (reg:TI V8_REGNUM)) + (clobber_high (reg:TI V9_REGNUM)) + (clobber_high (reg:TI V10_REGNUM)) + (clobber_high (reg:TI V11_REGNUM)) + (clobber_high (reg:TI V12_REGNUM)) + (clobber_high (reg:TI V13_REGNUM)) + (clobber_high (reg:TI V14_REGNUM)) + (clobber_high (reg:TI V15_REGNUM)) + (clobber_high (reg:TI V16_REGNUM)) + (clobber_high (reg:TI V17_REGNUM)) + (clobber_high (reg:TI V18_REGNUM)) + (clobber_high (reg:TI V19_REGNUM)) + (clobber_high (reg:TI V20_REGNUM)) + (clobber_high (reg:TI V21_REGNUM)) + (clobber_high (reg:TI V22_REGNUM)) + (clobber_high (reg:TI V23_REGNUM)) + (clobber_high (reg:TI V24_REGNUM)) + (clobber_high (reg:TI V25_REGNUM)) + (clobber_high (reg:TI V26_REGNUM)) + (clobber_high (reg:TI V27_REGNUM)) + (clobber_high (reg:TI V28_REGNUM)) + (clobber_high (reg:TI V29_REGNUM)) + (clobber_high (reg:TI V30_REGNUM)) + (clobber_high (reg:TI V31_REGNUM)) (clobber (match_scratch:DI 1 "=r"))] "TARGET_TLS_DESC" "adrp\\tx0, %A0\;ldr\\t%<w>1, [x0, #%L0]\;add\\t<w>0, <w>0, %L0\;.tlsdesccall\\t%0\;blr\\t%1" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_tls_preserve_1.c b/gcc/testsuite/gcc.target/aarch64/sve_tls_preserve_1.c new file mode 100644 index 0000000000000000000000000000000000000000..5bad829568130181ef1ab386545bd3ee164c322e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_tls_preserve_1.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fpic -march=armv8-a+sve" } */ + +/* Clobber highs do not need to be spilled around tls usage. */ + +typedef float v4si __attribute__ ((vector_size (16))); + +__thread v4si tx; + +v4si foo (v4si a, v4si b, v4si c) +{ + v4si y; + + y = a + tx + b + c; + + return y + 7; +} + +/* { dg-final { scan-assembler-not {\tstr\t} } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/sve_tls_preserve_2.c b/gcc/testsuite/gcc.target/aarch64/sve_tls_preserve_2.c new file mode 100644 index 0000000000000000000000000000000000000000..69e8829287b8418c28f8c227391c4f8d2186ea63 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_tls_preserve_2.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fpic -march=armv8-a+sve -msve-vector-bits=256 -fno-schedule-insns" } */ + +/* Clobber highs must be spilled around tls usage. */ + +typedef float v8si __attribute__ ((vector_size (32))); + +__thread v8si tx; + +v8si foo (v8si a, v8si b, v8si c) +{ + v8si y; + + /* There is nothing stopping the compiler from making the tls call before + loading the input variables off the stack. However, there appears to + be no way in C of enforcing this. Thankfully the compiler doesn't + do this reordering. */ + + y = a + tx + b + c; + + return y + 7; +} + +/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+,} 3 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve_tls_preserve_3.c b/gcc/testsuite/gcc.target/aarch64/sve_tls_preserve_3.c new file mode 100644 index 0000000000000000000000000000000000000000..b6aa59a3c7393d7e9ca419167d13b624a9ffafcc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_tls_preserve_3.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fpic -march=armv8-a+sve -msve-vector-bits=512 -fno-schedule-insns" } */ + +/* Clobber highs must be spilled around tls usage. */ + +typedef float v16si __attribute__ ((vector_size (64))); + +__thread v16si tx; + +v16si foo (v16si a, v16si b, v16si c) +{ + v16si y; + + /* There is nothing stopping the compiler from making the tls call before + loading the input variables off the stack. However, there appears to + be no way in C of enforcing this. Thankfully the compiler doesn't + do this reordering. */ + + y = a + tx + b + c; + + return y + 7; +} + +/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+,} 3 } } */