PowerPC ISA 3.0 adds new instructions (LXSIHZX, LXSIBZX, STXSIHX, and STXSIBX) that allow you to load and zero extend byte and half word values from memory and to store them back.
This patch is similar in spirit to the patch I wrote years ago for power7 that generates LFIWAX, LFIWZX, and STFIWX when loading up 32-bit integers to convert to floating point, and converting floating point to 32-bit integers. At some point it would be nice to allow various small integers directly into the floating/vector registers, but I suspect that will take some amount of effort to implement and tune. So this patch adds support to avoid using direct move when converting between small integers and floating point. If you are curious, out of the 29 Spec 2006 CPU benchmarks, there are 8 benchmarks (perlbench, cactusADM, gobmk, povray, k264ref, omnetpp, wrf, and sphinx3) that convert load up small integers from memory and convert them to floating point. There are 3 benchmarks (cactusADM, povray, and wrf) that convert floating point to small integers and store the result. I have done a bootstrap and make check with no regression on a power8 little endian system and there were no regressions. Are these patches ok to check into the trunk, and after a burn-in period, check them into the GCC 6.2 branch? [gcc] 2016-06-23 Michael Meissner <meiss...@linux.vnet.ibm.com> * config/rs6000/vsx.md (UNSPEC_P9_MEMORY): New unspec to support loading and storing byte/half-word values in the vector registers. (vsx_sign_extend_hi_<mode>): Enable the generator function. (p9_lxsi<wd>zx): New insns to load zero-extended bytes and half-words on ISA 3.0 to the vector registers. (p9_stxsi<wd>zx): New insns to store zero-extended bytes and half-words on ISA 3.0 from the vector registers. * config/rs6000/rs6000.md (FP_ISA3): New iterator to optimize converting char/half-word items to floating point on ISA 3.0. (float<QHI:mode><FP_ISA3:mode>2): On ISA 3.0 generate the lxsihzx and lxsibzx instructions if we are converting an 8-bit or 16-bit item from memory to floating point. (float<QHI:mode><FP_ISA3:mode>2_internal): Likewise. (floatuns<QHI:mode><FP_ISA3:mode>2): Likewise. (floatuns<QHI:mode><FP_ISA3:mode>2_internal): Likewise. (fix_trunc<SFDF:mode><QHI:mode>2): On ISA 3.0 generate the stxsihx and stxsibx instructions to store floating point values converted to 8 or 16-bit integers. (fixuns_trunc<mode>si2): Likewise. [gcc/testsuite] 2016-06-23 Michael Meissner <meiss...@linux.vnet.ibm.com> * gcc.target/powerpc/p9-fpcvt-1.c: New test to test ISA 3.0 load byte/half-word to vector registers and store byte/half-word from vector register instructions. * gcc.target/powerpc/p9-fpcvt-2.c: Likewise. -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/vsx.md =================================================================== --- gcc/config/rs6000/vsx.md (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000) (revision 237716) +++ gcc/config/rs6000/vsx.md (.../gcc/config/rs6000) (working copy) @@ -293,6 +293,7 @@ (define_c_enum "unspec" UNSPEC_VSX_XVCVDPSXDS UNSPEC_VSX_XVCVDPUXDS UNSPEC_VSX_SIGN_EXTEND + UNSPEC_P9_MEMORY ]) ;; VSX moves @@ -2705,7 +2706,7 @@ (define_insn "vsx_sign_extend_qi_<mode>" "vextsb2<wd> %0,%1" [(set_attr "type" "vecsimple")]) -(define_insn "*vsx_sign_extend_hi_<mode>" +(define_insn "vsx_sign_extend_hi_<mode>" [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v") (unspec:VSINT_84 [(match_operand:V8HI 1 "vsx_register_operand" "v")] @@ -2721,3 +2722,24 @@ (define_insn "*vsx_sign_extend_si_v2di" "TARGET_P9_VECTOR" "vextsw2d %0,%1" [(set_attr "type" "vecsimple")]) + + +;; ISA 3.0 memory operations +(define_insn "p9_lxsi<wd>zx" + [(set (match_operand:DI 0 "vsx_register_operand" "=wi") + (unspec:DI [(zero_extend:DI + (match_operand:QHI 1 "indexed_or_indirect_operand" "Z"))] + UNSPEC_P9_MEMORY))] + "TARGET_P9_VECTOR" + "lxsi<wd>zx %x0,%y1" + [(set_attr "type" "fpload")]) + +(define_insn "p9_stxsi<wd>x" + [(set (match_operand:QHI 0 "reg_or_indexed_operand" "=r,Z") + (unspec:QHI [(match_operand:DI 1 "vsx_register_operand" "wi,wi")] + UNSPEC_P9_MEMORY))] + "TARGET_P9_VECTOR" + "@ + mfvsrd %0,%x1 + stxsi<wd>x %x1,%y0" + [(set_attr "type" "mffgpr,fpstore")]) Index: gcc/config/rs6000/rs6000.md =================================================================== --- gcc/config/rs6000/rs6000.md (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000) (revision 237716) +++ gcc/config/rs6000/rs6000.md (.../gcc/config/rs6000) (working copy) @@ -506,6 +506,12 @@ (define_mode_iterator FLOAT128 [(KF "TAR (IF "TARGET_FLOAT128") (TF "TARGET_LONG_DOUBLE_128")]) +; Iterator for ISA 3.0 supported floating point types +(define_mode_iterator FP_ISA3 [SF + DF + (KF "FLOAT128_IEEE_P (KFmode)") + (TF "FLOAT128_IEEE_P (TFmode)")]) + ; SF/DF suffix for traditional floating instructions (define_mode_attr Ftrad [(SF "s") (DF "")]) @@ -872,7 +878,6 @@ (define_insn_and_split "*zero_extendsi<m (set_attr "dot" "yes") (set_attr "length" "4,8")]) - (define_insn "extendqi<mode>2" [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r") (sign_extend:EXTQI (match_operand:QI 1 "gpc_reg_operand" "r")))] @@ -5188,6 +5193,107 @@ (define_insn_and_split "*floatunssidf2_i [(set_attr "length" "20") (set_attr "type" "fp")]) +;; ISA 3.0 adds instructions lxsi[bh]zx to directly load QImode and HImode to +;; vector registers. At the moment, QI/HImode are not allowed in floating +;; point or vector registers, so we use UNSPEC's to use the load byte and +;; half-word instructions. + +(define_expand "float<QHI:mode><FP_ISA3:mode>2" + [(parallel [(set (match_operand:FP_ISA3 0 "vsx_register_operand" "") + (float:FP_ISA3 + (match_operand:QHI 1 "input_operand" ""))) + (clobber (match_scratch:DI 2 "")) + (clobber (match_scratch:DI 3 ""))])] + "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64" +{ + if (MEM_P (operands[1])) + operands[1] = rs6000_address_for_fpconvert (operands[1]); +}) + +(define_insn_and_split "*float<QHI:mode><FP_ISA3:mode>2_internal" + [(set (match_operand:FP_ISA3 0 "vsx_register_operand" "=<Fv>,<Fv>") + (float:FP_ISA3 + (match_operand:QHI 1 "reg_or_indexed_operand" "r,Z"))) + (clobber (match_scratch:DI 2 "=wi,v")) + (clobber (match_scratch:DI 3 "=r,X"))] + "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64 + && TARGET_UPPER_REGS_DI" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx result = operands[0]; + rtx input = operands[1]; + rtx di = operands[2]; + + if (!MEM_P (input)) + { + rtx tmp = operands[3]; + emit_insn (gen_extend<QHI:mode>di2 (tmp, input)); + emit_move_insn (di, tmp); + } + else + { + machine_mode vmode; + rtx di_vector; + + emit_insn (gen_p9_lxsi<QHI:wd>zx (di, input)); + + if (<MODE>mode == QImode) + vmode = V16QImode; + else if (<MODE>mode == HImode) + vmode = V8HImode; + else + gcc_unreachable (); + + di_vector = gen_rtx_REG (vmode, REGNO (di)); + emit_insn (gen_vsx_sign_extend_<QHI:mode>_di (di, di_vector)); + } + + emit_insn (gen_floatdi<FP_ISA3:mode>2 (result, di)); + DONE; +}) + +(define_expand "floatuns<QHI:mode><FP_ISA3:mode>2" + [(parallel [(set (match_operand:FP_ISA3 0 "vsx_register_operand" "") + (unsigned_float:FP_ISA3 + (match_operand:QHI 1 "input_operand" ""))) + (clobber (match_scratch:DI 2 "")) + (clobber (match_scratch:DI 3 ""))])] + "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64" +{ + if (MEM_P (operands[1])) + operands[1] = rs6000_address_for_fpconvert (operands[1]); +}) + +(define_insn_and_split "*floatuns<QHI:mode><FP_ISA3:mode>2_internal" + [(set (match_operand:FP_ISA3 0 "vsx_register_operand" "=<Fv>,<Fv>") + (unsigned_float:FP_ISA3 + (match_operand:QHI 1 "reg_or_indexed_operand" "r,Z"))) + (clobber (match_scratch:DI 2 "=wi,wi")) + (clobber (match_scratch:DI 3 "=r,X"))] + "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx result = operands[0]; + rtx input = operands[1]; + rtx di = operands[2]; + rtx tmp = operands[3]; + + if (!MEM_P (input)) + { + emit_insn (gen_zero_extend<QHI:mode>di2 (tmp, input)); + emit_move_insn (di, tmp); + } + else + emit_insn (gen_p9_lxsi<QHI:wd>zx (di, input)); + + emit_insn (gen_floatdi<FP_ISA3:mode>2 (result, di)); + DONE; +}) + (define_expand "fix_trunc<mode>si2" [(set (match_operand:SI 0 "gpc_reg_operand" "") (fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "")))] @@ -5296,6 +5402,23 @@ (define_insn "*fix_trunc<mode>di2_fctidz xscvdpsxds %x0,%x1" [(set_attr "type" "fp")]) +(define_expand "fix_trunc<SFDF:mode><QHI:mode>2" + [(use (match_operand:QHI 0 "rs6000_nonimmediate_operand" "")) + (use (match_operand:SFDF 1 "vsx_register_operand" ""))] + "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64" +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx di_tmp = gen_reg_rtx (DImode); + + if (MEM_P (op0)) + op0 = rs6000_address_for_fpconvert (op0); + + emit_insn (gen_fctiwz_<SFDF:mode> (di_tmp, op1)); + emit_insn (gen_p9_stxsi<QHI:wd>x (op0, di_tmp)); + DONE; +}) + (define_expand "fixuns_trunc<mode>si2" [(set (match_operand:SI 0 "gpc_reg_operand" "") (unsigned_fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "")))] @@ -5368,6 +5491,23 @@ (define_insn "*fixuns_trunc<mode>di2_fct xscvdpuxds %x0,%x1" [(set_attr "type" "fp")]) +(define_expand "fixuns_trunc<SFDF:mode><QHI:mode>2" + [(use (match_operand:QHI 0 "rs6000_nonimmediate_operand" "")) + (use (match_operand:SFDF 1 "vsx_register_operand" ""))] + "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64" +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx di_tmp = gen_reg_rtx (DImode); + + if (MEM_P (op0)) + op0 = rs6000_address_for_fpconvert (op0); + + emit_insn (gen_fctiwuz_<SFDF:mode> (di_tmp, op1)); + emit_insn (gen_p9_stxsi<QHI:wd>x (op0, di_tmp)); + DONE; +}) + ; Here, we use (set (reg) (unspec:DI [(fix:SI ...)] UNSPEC_FCTIWZ)) ; rather than (set (subreg:SI (reg)) (fix:SI ...)) ; because the first makes it clear that operand 0 is not live Index: gcc/testsuite/gcc.target/powerpc/p9-fpcvt-1.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/p9-fpcvt-1.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) (revision 0) +++ gcc/testsuite/gcc.target/powerpc/p9-fpcvt-1.c (.../gcc/testsuite/gcc.target/powerpc) (revision 237749) @@ -0,0 +1,17 @@ +/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-options "-mcpu=power9 -O2" } */ + +void sc (signed char *p, double x) { *p = x; } +void uc (unsigned char *p, double x) { *p = x; } +void ss (signed short *p, double x) { *p = x; } +void us (unsigned short *p, double x) { *p = x; } + +/* { dg-final { scan-assembler "stxsibx" } } */ +/* { dg-final { scan-assembler "stxsihx" } } */ +/* { dg-final { scan-assembler-not "mfvsrd" } } */ +/* { dg-final { scan-assembler-not "mfvsrwz" } } */ +/* { dg-final { scan-assembler-not "mtvsrd" } } */ +/* { dg-final { scan-assembler-not "mtvsrwa" } } */ +/* { dg-final { scan-assembler-not "mtvsrwz" } } */ Index: gcc/testsuite/gcc.target/powerpc/p9-fpcvt-2.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/p9-fpcvt-2.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) (revision 0) +++ gcc/testsuite/gcc.target/powerpc/p9-fpcvt-2.c (.../gcc/testsuite/gcc.target/powerpc) (revision 237749) @@ -0,0 +1,19 @@ +/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-options "-mcpu=power9 -O2" } */ + +double sc (signed char *p) { return (double)*p; } +double uc (unsigned char *p) { return (double)*p; } +double ss (signed short *p) { return (double)*p; } +double us (unsigned short *p) { return (double)*p; } + +/* { dg-final { scan-assembler "lxsibzx" } } */ +/* { dg-final { scan-assembler "lxsihzx" } } */ +/* { dg-final { scan-assembler "vextsb2d" } } */ +/* { dg-final { scan-assembler "vextsh2d" } } */ +/* { dg-final { scan-assembler-not "mfvsrd" } } */ +/* { dg-final { scan-assembler-not "mfvsrwz" } } */ +/* { dg-final { scan-assembler-not "mtvsrd" } } */ +/* { dg-final { scan-assembler-not "mtvsrwa" } } */ +/* { dg-final { scan-assembler-not "mtvsrwz" } } */