Hi, Gently ping it. https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653180.html
Thanks Gui Haochen 在 2024/6/20 15:01, HAO CHEN GUI 写道: > Hi, > Gently ping it. > https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653180.html > > Thanks > Gui Haochen > > 在 2024/5/31 11:25, HAO CHEN GUI 写道: >> Hi, >> This patch optimizes vector construction with two vector doubleword loads. >> It generates an optimal insn sequence as "xxlor" has lower latency than >> "mtvsrdd" on Power10. >> >> Compared with previous version, the main change is to use "isa" attribute >> to guard "lxsd" and "lxsdx". >> https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653103.html >> >> Bootstrapped and tested on powerpc64-linux BE and LE with no >> regressions. OK for the trunk? >> >> Thanks >> Gui Haochen >> >> ChangeLog >> rs6000: Optimize vector construction with two vector doubleword loads >> >> When constructing a vector by two doublewords from memory, originally it >> does >> ld 10,0(3) >> ld 9,0(4) >> mtvsrdd 34,9,10 >> >> An optimal sequence on Power10 should be >> lxsd 0,0(4) >> lxvrdx 1,0,3 >> xxlor 34,1,32 >> >> This patch does this optimization by insn combine and split. >> >> gcc/ >> PR target/103568 >> * config/rs6000/vsx.md (vsx_ld_lowpart_zero_<mode>): New insn >> pattern. >> (vsx_ld_highpart_zero_<mode>): New insn pattern. >> (vsx_concat_mem_<mode>): New insn_and_split pattern. >> >> gcc/testsuite/ >> PR target/103568 >> * gcc.target/powerpc/pr103568.c: New test. >> >> patch.diff >> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md >> index f135fa079bd..f9a2a260e89 100644 >> --- a/gcc/config/rs6000/vsx.md >> +++ b/gcc/config/rs6000/vsx.md >> @@ -1395,6 +1395,27 @@ (define_insn "vsx_ld_elemrev_v2di" >> "lxvd2x %x0,%y1" >> [(set_attr "type" "vecload")]) >> >> +(define_insn "vsx_ld_lowpart_zero_<mode>" >> + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=v,wa") >> + (vec_concat:VSX_D >> + (match_operand:<VEC_base> 1 "memory_operand" "wY,Z") >> + (match_operand:<VEC_base> 2 "zero_constant" "j,j")))] >> + "" >> + "@ >> + lxsd %0,%1 >> + lxsdx %x0,%y1" >> + [(set_attr "type" "vecload,vecload") >> + (set_attr "isa" "p9v,p7v")]) >> + >> +(define_insn "vsx_ld_highpart_zero_<mode>" >> + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") >> + (vec_concat:VSX_D >> + (match_operand:<VEC_base> 1 "zero_constant" "j") >> + (match_operand:<VEC_base> 2 "memory_operand" "Z")))] >> + "TARGET_POWER10" >> + "lxvrdx %x0,%y2" >> + [(set_attr "type" "vecload")]) >> + >> (define_insn "vsx_ld_elemrev_v1ti" >> [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa") >> (vec_select:V1TI >> @@ -3063,6 +3084,26 @@ (define_insn "vsx_concat_<mode>" >> } >> [(set_attr "type" "vecperm,vecmove")]) >> >> +(define_insn_and_split "vsx_concat_mem_<mode>" >> + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=v,wa") >> + (vec_concat:VSX_D >> + (match_operand:<VEC_base> 1 "memory_operand" "wY,Z") >> + (match_operand:<VEC_base> 2 "memory_operand" "Z,Z")))] >> + "TARGET_POWER10 && can_create_pseudo_p ()" >> + "#" >> + "&& 1" >> + [(const_int 0)] >> +{ >> + rtx tmp1 = gen_reg_rtx (<MODE>mode); >> + rtx tmp2 = gen_reg_rtx (<MODE>mode); >> + emit_insn (gen_vsx_ld_highpart_zero_<mode> (tmp1, CONST0_RTX >> (<VEC_base>mode), >> + operands[1])); >> + emit_insn (gen_vsx_ld_lowpart_zero_<mode> (tmp2, operands[2], >> + CONST0_RTX (<VEC_base>mode))); >> + emit_insn (gen_ior<mode>3 (operands[0], tmp1, tmp2)); >> + DONE; >> +}) >> + >> ;; Combiner patterns to allow creating XXPERMDI's to access either double >> ;; word element in a vector register. >> (define_insn "*vsx_concat_<mode>_1" >> diff --git a/gcc/testsuite/gcc.target/powerpc/pr103568.c >> b/gcc/testsuite/gcc.target/powerpc/pr103568.c >> new file mode 100644 >> index 00000000000..b2a06fb2162 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/powerpc/pr103568.c >> @@ -0,0 +1,17 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ >> + >> +vector double test (double *a, double *b) >> +{ >> + return (vector double) {*a, *b}; >> +} >> + >> +vector long long test1 (long long *a, long long *b) >> +{ >> + return (vector long long) {*a, *b}; >> +} >> + >> +/* { dg-final { scan-assembler-times {\mlxsd} 2 } } */ >> +/* { dg-final { scan-assembler-times {\mlxvrdx\M} 2 } } */ >> +/* { dg-final { scan-assembler-times {\mxxlor\M} 2 } } */ >> +