https://gcc.gnu.org/g:901c7f0836effdb24831f04ec4a5884f33fd76ba
commit 901c7f0836effdb24831f04ec4a5884f33fd76ba Author: Michael Meissner <meiss...@linux.ibm.com> Date: Tue Aug 20 12:52:34 2024 -0400 Add vector pair init and splat. 2024-08-20 Michael Meissner <meiss...@linux.ibm.com> gcc/ * config/rs6000/rs6000-builtins.def (__builtin_vpair_zero): New built-in function. (__builtin_vpair_f32_splat): Likewise. (__builtin_vpair_f64_splat): Likewise. * config/rs6000/vector-pair.md (UNSPEC_VPAIR_ZERO): New unspec. (UNSPEC_VPAIR_SPLAT): Likewise. (VPAIR_SPLAT_VMODE): New mode iterator. (VPAIR_SPLAT_ELEMENT_TO_VMODE): New mode attribute. (vpair_splat_name): Likewise. (vpair_zero): New insn. (vpair_splat_<vpair_splat_name>): New define_expand. (vpair_splat_<vpair_splat_name>_internal): New insns. gcc/testsuite/ * gcc.target/powerpc/vector-pair-5.c: New test. * gcc.target/powerpc/vector-pair-6.c: Likewise. Diff: --- gcc/config/rs6000/rs6000-builtins.def | 10 ++++ gcc/config/rs6000/vector-pair.md | 102 +++++++++++++++++++++++++++++++++- gcc/doc/extend.texi | 9 +++ 3 files changed, 120 insertions(+), 1 deletion(-) diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index 099f4b6a008..b3eaa842f12 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -3934,6 +3934,10 @@ void __builtin_vsx_stxvp (v256, unsigned long, const v256 *); STXVP nothing {mma,pair} +;; Vector pair built-in functions. + v256 __builtin_vpair_zero (); + VPAIR_ZERO vpair_zero {mma} + ;; Vector pair built-in functions with float elements v256 __builtin_vpair_f32_abs (v256); VPAIR_F32_ABS vpair_abs_v8sf2 {mma} @@ -3971,6 +3975,9 @@ v256 __builtin_vpair_f32_nfms (v256, v256, v256); VPAIR_F32_NFMS vpair_nfms_v8sf4 {mma} + v256 __builtin_vpair_f32_splat (float); + VPAIR_F32_SPLAT vpair_splat_v8sf {mma} + v256 __builtin_vpair_f32_sub (v256, v256); VPAIR_F32_SUB vpair_sub_v8sf3 {mma} @@ -4011,5 +4018,8 @@ v256 __builtin_vpair_f64_nfms (v256, v256, v256); VPAIR_F64_NFMS vpair_nfms_v4df4 {mma} + v256 __builtin_vpair_f64_splat (double); + VPAIR_F64_SPLAT vpair_splat_v4df {mma} + v256 __builtin_vpair_f64_sub (v256, v256); VPAIR_F64_SUB vpair_sub_v4df3 {mma} diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md index 73ae46e6d40..39b419c6814 100644 --- a/gcc/config/rs6000/vector-pair.md +++ b/gcc/config/rs6000/vector-pair.md @@ -38,7 +38,9 @@ UNSPEC_VPAIR_NEG UNSPEC_VPAIR_PLUS UNSPEC_VPAIR_SMAX - UNSPEC_VPAIR_SMIN]) + UNSPEC_VPAIR_SMIN + UNSPEC_VPAIR_ZERO + UNSPEC_VPAIR_SPLAT]) ;; Vector pair element ID that defines the scaler element within the vector pair. (define_c_enum "vpair_element" @@ -98,6 +100,104 @@ ;; Map the scalar element ID into the appropriate insn type for divide. (define_int_attr vpair_divtype [(VPAIR_ELEMENT_FLOAT "vecfdiv") (VPAIR_ELEMENT_DOUBLE "vecdiv")]) + +;; Mode iterator for the vector modes that we provide splat operations for. +(define_mode_iterator VPAIR_SPLAT_VMODE [V4SF V2DF]) + +;; Map element mode to 128-bit vector mode for splat operations +(define_mode_attr VPAIR_SPLAT_ELEMENT_TO_VMODE [(SF "V4SF") + (DF "V2DF")]) + +;; Map either element mode or vector mode into the name for the splat insn. +(define_mode_attr vpair_splat_name [(SF "v8sf") + (DF "v4df") + (V4SF "v8sf") + (V2DF "v4df")]) + +;; Initialize a vector pair to 0 +(define_insn_and_split "vpair_zero" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa") + (unspec:OO [(const_int 0)] UNSPEC_VPAIR_ZERO))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(set (match_dup 1) (match_dup 3)) + (set (match_dup 2) (match_dup 3))] +{ + rtx op0 = operands[0]; + + operands[1] = simplify_gen_subreg (V2DFmode, op0, OOmode, 0); + operands[2] = simplify_gen_subreg (V2DFmode, op0, OOmode, 16); + operands[3] = CONST0_RTX (V2DFmode); +} + [(set_attr "length" "8") + (set_attr "type" "vecperm")]) + +;; Create a vector pair with a value splat'ed (duplicated) to all of the +;; elements. +(define_expand "vpair_splat_<vpair_splat_name>" + [(use (match_operand:OO 0 "vsx_register_operand")) + (use (match_operand:SFDF 1 "input_operand"))] + "TARGET_MMA" +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + machine_mode element_mode = <MODE>mode; + + if (op1 == CONST0_RTX (element_mode)) + { + emit_insn (gen_vpair_zero (op0)); + DONE; + } + + machine_mode vector_mode = <VPAIR_SPLAT_ELEMENT_TO_VMODE>mode; + rtx vec = gen_reg_rtx (vector_mode); + unsigned num_elements = GET_MODE_NUNITS (vector_mode); + rtvec elements = rtvec_alloc (num_elements); + for (size_t i = 0; i < num_elements; i++) + RTVEC_ELT (elements, i) = copy_rtx (op1); + + rs6000_expand_vector_init (vec, gen_rtx_PARALLEL (vector_mode, elements)); + emit_insn (gen_vpair_splat_<vpair_splat_name>_internal (op0, vec)); + DONE; +}) + +;; Inner splat support. Operand1 is the vector splat created above. Allow +;; operand 1 to overlap with the output registers to eliminate one move +;; instruction. +(define_insn_and_split "vpair_splat_<vpair_splat_name>_internal" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa") + (unspec:OO + [(match_operand:VPAIR_SPLAT_VMODE 1 "vsx_register_operand" "0,wa")] + UNSPEC_VPAIR_SPLAT))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx op0 = operands[0]; + rtx op0_a = simplify_gen_subreg (<MODE>mode, op0, OOmode, 0); + rtx op0_b = simplify_gen_subreg (<MODE>mode, op0, OOmode, 16); + rtx op1 = operands[1]; + unsigned op1_regno = reg_or_subregno (op1); + + /* Check if the input is one of the output registers. */ + if (op1_regno == reg_or_subregno (op0_a)) + emit_move_insn (op0_b, op1); + + else if (op1_regno == reg_or_subregno (op0_b)) + emit_move_insn (op0_a, op1); + + else + { + emit_move_insn (op0_a, op1); + emit_move_insn (op0_b, op1); + } + + DONE; +} + [(set_attr "length" "*,8") + (set_attr "type" "vecmove")]) ;; Vector pair unary operations. The last argument in the UNSPEC is a ;; CONST_INT which identifies what the scalar element is. diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index e3b2090ef18..43893d74415 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -24206,6 +24206,13 @@ The @code{nfma} built-in is a combination of @code{neg} of the The @code{nfms} built-in is a combination of @code{neg} of the @code{fms} built-in. +The following built-in function is independent on the type of the +underlying vector: + +@smallexample +__vector_pair __builtin_vpair_zero (); +@end smallexample + The following built-in functions operate on pairs of @code{vector float} values: @@ -24226,6 +24233,7 @@ __vector_pair __builtin_vpair_f32_nfma (__vector_pair, __vector_pair, __vector_pair); __vector_pair __builtin_vpair_f32_nfms (__vector_pair, __vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f32_splat (float); __vector_pair __builtin_vpair_f32_sub (__vector_pair, __vector_pair); @end smallexample @@ -24249,6 +24257,7 @@ __vector_pair __builtin_vpair_f64_nfma (__vector_pair, __vector_pair, __vector_pair); __vector_pair __builtin_vpair_f64_nfms (__vector_pair, __vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f64_splat (double); __vector_pair __builtin_vpair_f64_sub (__vector_pair, __vector_pair); @end smallexample