Emulate MMX sse_cvtpi2ps with SSE2 cvtdq2ps, preserving upper 64 bits of
destination XMM register. Only SSE register source operand is allowed.
PR target/89021
* config/i386/mmx.md (UNSPEC_CVTPI2PS): New.
(sse_cvtpi2ps): Renamed to ...
(*mmx_cvtpi2ps): This. Disabled for TARGET_MMX_WITH_SSE.
(sse_cvtpi2ps): New.
(mmx_cvtpi2ps_sse): Likewise.
---
gcc/config/i386/sse.md | 83 +++++++++++++++++++++++++++++++++++++++++-
1 file changed, 81 insertions(+), 2 deletions(-)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 4321c5c46db..4503d393dc9 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -18,6 +18,9 @@
;; <http://www.gnu.org/licenses/>.
(define_c_enum "unspec" [
+ ;; MMX with SSE
+ UNSPEC_CVTPI2PS
+
;; SSE
UNSPEC_MOVNT
@@ -4655,14 +4658,90 @@
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(define_insn "sse_cvtpi2ps"
+(define_expand "sse_cvtpi2ps"
+ [(set (match_operand:V4SF 0 "register_operand")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand")))
+ (match_operand:V4SF 1 "register_operand")
+ (const_int 3)))]
+ "TARGET_SSE"
+{
+ if (TARGET_MMX_WITH_SSE)
+ {
+ rtx op2 = force_reg (V2SImode, operands[2]);
+ rtx op3 = gen_reg_rtx (V4SFmode);
+ rtx op4 = gen_reg_rtx (V4SFmode);
+ rtx insn = gen_mmx_cvtpi2ps_sse (operands[0], operands[1], op2,
+ op3, op4);
+ emit_insn (insn);
+ DONE;
+ }
+})
+
+(define_insn_and_split "mmx_cvtpi2ps_sse"
+ [(set (match_operand:V4SF 0 "register_operand" "=Yx,Yy")
+ (unspec:V4SF [(match_operand:V2SI 2 "register_operand" "Yx,Yy")
+ (match_operand:V4SF 1 "register_operand" "0,Yy")]
+ UNSPEC_CVTPI2PS))
+ (set (match_operand:V4SF 3 "register_operand" "=Yx,Yy")
+ (unspec:V4SF [(match_operand:V4SF 4 "register_operand" "3,3")]
+ UNSPEC_CVTPI2PS))]
+ "TARGET_MMX_WITH_SSE"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2]));
+ /* Generate SSE2 cvtdq2ps. */
+ rtx insn = gen_floatv4siv4sf2 (operands[3], op2);
+ emit_insn (insn);
+
+ /* Merge operands[3] with operands[0]. */
+ rtx mask, op1;
+ if (TARGET_AVX)
+ {
+ mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (4, GEN_INT (0), GEN_INT (1),
+ GEN_INT (6), GEN_INT (7)));
+ op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[3], operands[1]);
+ op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
+ insn = gen_rtx_SET (operands[0], op2);
+ }
+ else
+ {
+ /* NB: SSE can only concatenate OP0 and OP3 to OP0. */
+ mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (4, GEN_INT (2), GEN_INT (3),
+ GEN_INT (4), GEN_INT (5)));
+ op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[0], operands[3]);
+ op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
+ insn = gen_rtx_SET (operands[0], op2);
+ emit_insn (insn);
+
+ /* Swap bits 0:63 with bits 64:127. */
+ mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (4, GEN_INT (2), GEN_INT (3),
+ GEN_INT (0), GEN_INT (1)));
+ rtx dest = gen_rtx_REG (V4SImode, REGNO (operands[0]));
+ op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+ insn = gen_rtx_SET (dest, op1);
+ }
+ emit_insn (insn);
+ DONE;
+}
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "ssecvt")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*mmx_cvtpi2ps"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
(vec_duplicate:V4SF
(float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
(match_operand:V4SF 1 "register_operand" "0")
(const_int 3)))]
- "TARGET_SSE"
+ "TARGET_SSE && !TARGET_MMX_WITH_SSE"
"cvtpi2ps\t{%2, %0|%0, %2}"
[(set_attr "type" "ssecvt")
(set_attr "mode" "V4SF")])
--
2.20.1