https://gcc.gnu.org/g:8398ef96cc503cffb1447c5b02741e24423ec120
commit r15-3970-g8398ef96cc503cffb1447c5b02741e24423ec120 Author: Victor Do Nascimento <victor.donascime...@arm.com> Date: Fri Jul 5 15:18:32 2024 +0100 autovectorizer: Test autovectorization of different dot-prod modes. Given the novel treatment of the dot product optab as a conversion, we are now able to target different relationships between output modes and input modes. This is made clearer by way of example. Previously, on AArch64, the following loop was vectorizable: uint32_t udot4(int n, uint8_t* data) { uint32_t sum = 0; for (int i=0; i<n; i+=1) sum += data[i] * data[i]; return sum; } while the following was not: uint32_t udot2(int n, uint16_t* data) { uint32_t sum = 0; for (int i=0; i<n; i+=1) sum += data[i] * data[i]; return sum; } Under the new treatment of the dot product optab, they are both now vectorizable. This adds the relevant target-agnostic check to ensure this behavior in the autovectorizer, gated behind the new check_effective_target `vect_dotprod_hisi' as well a runtime check targeting aarch64. gcc/testsuite/ChangeLog: * lib/target-supports.exp (check_effective_target_vect_dotprod_hisi): New. * gcc.dg/vect/vect-dotprod-conv-optab.c: Likewise. * gcc.target/aarch64/vect-dotprod-twoway-hisi.c: Likewise. Diff: --- .../gcc.dg/vect/vect-dotprod-conv-optab.c | 41 ++++++++++++++ .../gcc.target/aarch64/vect-dotprod-twoway-hisi.c | 66 ++++++++++++++++++++++ gcc/testsuite/lib/target-supports.exp | 9 +++ 3 files changed, 116 insertions(+) diff --git a/gcc/testsuite/gcc.dg/vect/vect-dotprod-conv-optab.c b/gcc/testsuite/gcc.dg/vect/vect-dotprod-conv-optab.c new file mode 100644 index 000000000000..63e6c95480da --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-dotprod-conv-optab.c @@ -0,0 +1,41 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_dotprod_hisi } */ +/* Ensure that, given the same input datatype, both the two-way and four-way + dot products are autovectorized, with the correct operation then selected + based on the distinct output types. */ +#include <stdint.h> + +uint32_t udot4(int n, uint8_t* data) { + uint32_t sum = 0; + for (int i=0; i<n; i+=1) { + sum += data[i] * data[i]; + } + return sum; +} + +int32_t sdot4(int n, int8_t* data) { + int32_t sum = 0; + for (int i=0; i<n; i+=1) { + sum += data[i] * data[i]; + } + return sum; +} + +uint32_t udot2(int n, uint16_t* data) { + uint32_t sum = 0; + for (int i=0; i<n; i+=1) { + sum += data[i] * data[i]; + } + return sum; +} + +int32_t sdot2(int n, int16_t* data) { + int32_t sum = 0; + for (int i=0; i<n; i+=1) { + sum += data[i] * data[i]; + } + return sum; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 4 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vect-dotprod-twoway-hisi.c b/gcc/testsuite/gcc.target/aarch64/vect-dotprod-twoway-hisi.c new file mode 100644 index 000000000000..0490faa2c94b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect-dotprod-twoway-hisi.c @@ -0,0 +1,66 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vect_dotprod_hisi } */ +/* { dg-options "-static -O3 -ftree-vectorize -fdump-tree-vect-details -save-temps" } */ +/* Ensure runtime correctness in the autovectorized two-way dot product operations. */ + +#include <stdint.h> +#include <stdlib.h> +#pragma GCC target "+sme2" + +uint32_t +udot2 (int n, uint16_t* data) __arm_streaming +{ + uint32_t sum = 0; + for (int i=0; i<n; i+=1) { + sum += data[i] * data[i]; + } + return sum; +} + +int32_t +sdot2 (int n, int16_t* data) __arm_streaming +{ + int32_t sum = 0; + for (int i=0; i<n; i+=1) { + sum += data[i] * data[i]; + } + return sum; +} + +int +main () +{ + + uint16_t u_input_nil[] = { [0 ... 3] = 0 }; + uint16_t u_input_min[] = { [0 ... 3] = 1 }; + uint16_t u_input_max[] = { [0 ... 3] = 32767}; + + uint32_t u_nil_dotprod = udot2 (4, u_input_nil); + uint32_t u_min_dotprod = udot2 (4, u_input_min); + uint32_t u_max_dotprod = udot2 (4, u_input_max); + + if (u_nil_dotprod != 0 + || u_min_dotprod != 4 + || u_max_dotprod != 4294705156) + abort (); + + int16_t s_input_nil[] = { [0 ... 3] = 0 }; + int16_t s_input_min[] = { [0 ... 3] = -23170 }; + int16_t s_input_max[] = { [0 ... 3] = 23170 }; + + int32_t s_nil_dotprod = sdot2 (4, s_input_nil); + int32_t s_min_dotprod = sdot2 (4, s_input_min); + int32_t s_max_dotprod = sdot2 (4, s_input_max); + + if (s_nil_dotprod != 0 + || s_min_dotprod != 2147395600 + || s_max_dotprod != 2147395600) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 46 "vect" } } */ +/* { dg-final { scan-assembler "\[ \t\]udot\tz\[0-9\]+.s, z\[0-9\]+.h, z\[0-9\]+.h" } } */ +/* { dg-final { scan-assembler "\[ \t\]sdot\tz\[0-9\]+.s, z\[0-9\]+.h, z\[0-9\]+.h" } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 05a63c4e9a55..f92f7f1af9c6 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -4294,6 +4294,15 @@ proc check_effective_target_vect_int_div { } { return [check_effective_target_aarch64_sve] } +# Return 1 if the target supports two-way dot products on inpus of hi mode +# producing si outputs, 0 otherwise. + +proc check_effective_target_vect_dotprod_hisi { } { + return [check_cached_effective_target_indexed aarch64_sme2 { + expr { [check_effective_target_aarch64_sme2] + }}] +} + # Return 1 if the target supports vectorization of early breaks, # 0 otherwise. #