https://gcc.gnu.org/g:58c9ff276f5fad46546494ec6394f040c1d6189e

commit 58c9ff276f5fad46546494ec6394f040c1d6189e
Author: Michael Meissner <meiss...@linux.ibm.com>
Date:   Thu Oct 3 12:31:37 2024 -0400

    Add support for restricting asm to fpr registers.
    
    2024-10-03  Michael Meissner  <meiss...@linux.ibm.com>
    
            * config/rs6000/vector-pair.h (__VPAIR_SPLAT): Add support for
            restricting asm usage to traditional FPR registers.
            (__VPAIR_UNARY): Likewise.
            (__VPAIR_BINARY): Likewise.
            (__VPAIR_RMA): Likewise.

Diff:
---
 gcc/config/rs6000/vector-pair.h | 50 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 46 insertions(+), 4 deletions(-)

diff --git a/gcc/config/rs6000/vector-pair.h b/gcc/config/rs6000/vector-pair.h
index c36952003551..e146f6ca17cb 100644
--- a/gcc/config/rs6000/vector-pair.h
+++ b/gcc/config/rs6000/vector-pair.h
@@ -88,10 +88,51 @@ typedef union __vpair_union vector_pair_f32_t;
 /* Operations using a vector pair and __asm__operations.  */
 #elif __MMA__ && !__VPAIR_NOP10__
 
+/* When using __asm__, we need to access the second register.  Due to the way
+   VSX registers were formed by combining the traditional floating point
+   registers and Altivec registers, we can't use the output modifier %L<n> to
+   refer to the second register if the VSX register was a traditional Altivec
+   register.  If the value is in VSX registers 34 & 35, %x0 would give 34, but
+   %L0 would give 1, since 'Altivec' registers start at 0.
+
+   If we are using GAS under Linux, we can use %x0+1 to access the second
+   register and use the full VSX register set.
+
+   If this include file is used on non-Linux systems, or with a non-GCC
+   compiler, limit the registers used to the traditional FPR registers so that
+   we can use %L0.  */
+
+#if __VPAIR__USE_FPR__ || !__GNUC__ || (!__linux__ && !__ELF__)
+
+/* Use %0 and %L0 on traditional FPR registers.  */
+#define __VPAIR_SPLAT(R, X, VP_FUNC, VEC)                              \
+  __asm__ ("xxlor %L0,%0,%0"                                           \
+           : "=d" ((R)->__vpair)                                       \
+           : "0" (__builtin_vec_splats ((X))))
+
+#define __VPAIR_UNARY(R, A, VP_FUNC, OPCODE, VEC, VEC_FUNC)            \
+  __asm__ (OPCODE " %0,%1\n\t" OPCODE " %L0,%L1"                       \
+           : "=d" ((R)->__vpair)                                       \
+           : "d" ((A)->__vpair))
+
+#define __VPAIR_BINARY(R, A, B, VP_FUNC, OPCODE, VEC, VEC_FUNC)                
\
+  __asm__ (OPCODE " %0,%1\n\t" OPCODE " %L0,%L1"                       \
+           : "=d" ((R)->__vpair)                                       \
+           : "d" ((A)->__vpair), "d" ((B)->__vpair))
+
+/* Note the 'a' form of the fma instructions must be used.  */
+#define __VPAIR_FMA(R, A, B, C, VP_FUNC, OPCODE, VEC, VEC_FUNC)                
\
+  __asm__ (OPCODE " %0,%1,%2\n\t" OPCODE " %L0,%L1,%L2"                        
\
+           : "=d" ((R)->__vpair)                                       \
+           : "d" ((A)->__vpair), "d" ((B)->__vpair), "0" ((C)->__vpair))
+
+#else
+
+/* Use %x0 and %x0+1 on VSX reigsters.  */
 #define __VPAIR_SPLAT(R, X, VP_FUNC, VEC)                              \
   __asm__ ("xxlor %x0+1,%x0,%x0"                                       \
-          : "=wa" ((R)->__vpair)                                       \
-          : "0" (__builtin_vec_splats ((X))))
+           : "=wa" ((R)->__vpair)                                      \
+           : "0" (__builtin_vec_splats ((X))))
 
 #define __VPAIR_UNARY(R, A, VP_FUNC, OPCODE, VEC, VEC_FUNC)            \
   __asm__ (OPCODE " %x0,%x1\n\t" OPCODE " %x0+1,%x1+1"                 \
@@ -101,13 +142,14 @@ typedef union __vpair_union       vector_pair_f32_t;
 #define __VPAIR_BINARY(R, A, B, VP_FUNC, OPCODE, VEC, VEC_FUNC)                
\
   __asm__ (OPCODE " %x0,%x1\n\t" OPCODE " %x0+1,%x1+1"                 \
            : "=wa" ((R)->__vpair)                                      \
-          : "wa" ((A)->__vpair), "wa" ((B)->__vpair))
+           : "wa" ((A)->__vpair), "wa" ((B)->__vpair))
 
 /* Note the 'a' form of the fma instructions must be used.  */
 #define __VPAIR_FMA(R, A, B, C, VP_FUNC, OPCODE, VEC, VEC_FUNC)                
\
   __asm__ (OPCODE " %x0,%x1,%x2\n\t" OPCODE " %x0+1,%x1+1,%x2+1"       \
            : "=wa" ((R)->__vpair)                                      \
-          : "wa" ((A)->__vpair), "wa" ((B)->__vpair), "0" ((C)->__vpair))
+           : "wa" ((A)->__vpair), "wa" ((B)->__vpair), "0" ((C)->__vpair))
+#endif /* Select whether to use %0/%L0 or %x0/%x0+1.  */
 
 #else  /* vpair support on power8/power9.  */

Reply via email to