On Fri, Jul 11, 2025 at 10:39 AM H.J. Lu <hjl.to...@gmail.com> wrote:
>
> On Fri, Jul 11, 2025 at 4:23 PM Uros Bizjak <ubiz...@gmail.com> wrote:
> >
> > On Fri, Jul 11, 2025 at 9:57 AM Uros Bizjak <ubiz...@gmail.com> wrote:
> > >
> > > On Fri, Jul 11, 2025 at 6:05 AM H.J. Lu <hjl.to...@gmail.com> wrote:
> > >
> > > > gcc/
> > > >
> > > > PR target/121015
> > > > * config/i386/constraints.md (BX): New constraint.
> > > > * config/i386/i386.cc (ix86_print_operand): Support CONSTM1_RTX.
> > > > * config/i386/mmx.md (MMXMODE:*mov<mode>_internal): Replace C with
> > > > BX for memory and integer register destination.  Replace <v,C>
> > > > with <v,BX>.
> > > > Update 32-bit MMXMODE move splitter to also split all 1s vector
> > > > source operand.
> > > > * config/i386/predicates.md (vector_const0_or_m1_operand): New
> > > > predicate.
> > > > (nonimm_or_vector_const0_or_m1_operand): Likewise.
> > > >
> > > > gcc/testsuite/
> > > >
> > > > PR target/121015
> > > > * gcc.target/i386/pr106022-2.c: Adjusted.
> > > > * gcc.target/i386/pr121015-1.c: New test.
> > > > * gcc.target/i386/pr121015-2.c: Likewise.
> > > > * gcc.target/i386/pr121015-3.c: Likewise.
> > > > * gcc.target/i386/pr121015-4.c: Likewise.
> > > > * gcc.target/i386/pr121015-5.c: Likewise.
> > > > * gcc.target/i386/pr121015-6.c: Likewise.
> > > >
> > > > OK for master?
> > >
> > > Please try the attached patch that introduces "all ones" handling to MMX 
> > > moves.
> >
> > Bah, wrong version attached (missing 32bit modes in mmxconstm1) -
> > please try this.
> >
> > Uros.
>
> Here are the source and 2 assembly codes generated by -O2 -march=x86-64-v3.
> My patch generates:
>
> movq $-1, %rax
> ...
> movq %rax, 4(%rcx)
> ...
> movq %rax, 4(%rcx)
> ...
> movq %rax, 4(%rcx)
>
> Yours generates:
>
> vpcmpeqd %xmm0, %xmm0, %xmm0
> ...
> vmovlps %xmm0, 4(%rdx)
> ...
> vpcmpeqd %xmm1, %xmm1, %xmm1
> ...
> vmovlps %xmm1, 4(%rdx)
> ...
> vpcmpeqd %xmm2, %xmm2, %xmm2
> ...
> vmovlps %xmm2, 4(%rdx)
>
> I prefer the assembly codes generated by my patch.

Yes, I also noticed this issue after some more testing. The attached
patch revision adds <m,BC> constraint that results in even better:

       movq    $-1, 4(%rdx)

Please note we don't want this for 32-bit targets, where the above
would result in two stores. "vpcmpeqd %xmm1, %xmm1, %xmm1;  vmovlps
%xmm1, 4(%rdx)" should be used instead.

Uros.
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index ad7360ec71a..c9b0ddf290c 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -5448,6 +5448,8 @@ standard_sse_constant_p (rtx x, machine_mode pred_mode)
            return 2;
          break;
        case 16:
+       case 8:
+       case 4:
          if (TARGET_SSE2)
            return 2;
          break;
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 79202323e53..f5f4782101c 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -111,6 +111,13 @@ (define_mode_attr mmxinsnmode
    (V4BF "DI") (V2BF "SI")
    (V2SF "DI")])
 
+;; MMX constant -1 constraint
+(define_mode_attr mmxconstm1
+  [(V8QI "BC") (V4HI "BC") (V2SI "BC") (V1DI "BC")
+   (V4QI "BC") (V2HI "BC") (V1SI "BC")
+   (V4HF "BF") (V4BF "BF") (V2SF "BF")
+   (V2HF "BF") (V2BF "BF")])
+
 (define_mode_attr mmxdoublemode
   [(V8QI "V8HI") (V4HI "V4SI")])
 
@@ -174,7 +181,7 @@ (define_mode_attr Yv_Yw
 
 (define_expand "mov<mode>"
   [(set (match_operand:MMXMODE 0 "nonimmediate_operand")
-       (match_operand:MMXMODE 1 "nonimm_or_0_operand"))]
+       (match_operand:MMXMODE 1 "nonimmediate_or_sse_const_operand"))]
   "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
   ix86_expand_vector_move (<MODE>mode, operands);
@@ -183,9 +190,9 @@ (define_expand "mov<mode>"
 
 (define_insn "*mov<mode>_internal"
   [(set (match_operand:MMXMODE 0 "nonimmediate_operand"
-    "=r ,o ,r,r ,m ,?!y,!y,?!y,m  ,r  ,?!y,v,v,v,m,r,v,!y,*x")
-       (match_operand:MMXMODE 1 "nonimm_or_0_operand"
-    "rCo,rC,C,rm,rC,C  ,!y,m  ,?!y,?!y,r  ,C,v,m,v,v,r,*x,!y"))]
+    "=r ,o ,r,r ,m ,m           ,?!y,!y,?!y,m  ,r  ,?!y,v,v           
,v,v,m,r,v,!y,*x")
+       (match_operand:MMXMODE 1 "nonimmediate_or_sse_const_operand"
+    "rCo,rC,C,rm,rC,<mmxconstm1>,C  ,!y,m  ,?!y,?!y,r  
,C,<mmxconstm1>,v,m,v,v,r,*x,!y"))]
   "(TARGET_MMX || TARGET_MMX_WITH_SSE)
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))
    && ix86_hardreg_mov_ok (operands[0], operands[1])"
@@ -230,31 +237,31 @@ (define_insn "*mov<mode>_internal"
   [(set (attr "isa")
      (cond [(eq_attr "alternative" "0,1")
              (const_string "nox64")
-           (eq_attr "alternative" "2,3,4,9,10")
+           (eq_attr "alternative" "2,3,4,10,11")
              (const_string "x64")
-           (eq_attr "alternative" "15,16")
+           (eq_attr "alternative" "5,17,18")
              (const_string "x64_sse2")
-           (eq_attr "alternative" "17,18")
+           (eq_attr "alternative" "13,19,20")
              (const_string "sse2")
           ]
           (const_string "*")))
    (set (attr "type")
      (cond [(eq_attr "alternative" "0,1")
              (const_string "multi")
-           (eq_attr "alternative" "2,3,4")
+           (eq_attr "alternative" "2,3,4,5")
              (const_string "imov")
-           (eq_attr "alternative" "5")
+           (eq_attr "alternative" "6")
              (const_string "mmx")
-           (eq_attr "alternative" "6,7,8,9,10")
+           (eq_attr "alternative" "7,8,9,10,11")
              (const_string "mmxmov")
-           (eq_attr "alternative" "11")
+           (eq_attr "alternative" "12,13")
              (const_string "sselog1")
-           (eq_attr "alternative" "17,18")
+           (eq_attr "alternative" "19,20")
              (const_string "ssecvt")
           ]
           (const_string "ssemov")))
    (set (attr "prefix_rex")
-     (if_then_else (eq_attr "alternative" "9,10,15,16")
+     (if_then_else (eq_attr "alternative" "10,11,17,18")
        (const_string "1")
        (const_string "*")))
    (set (attr "prefix")
@@ -269,7 +276,7 @@ (define_insn "*mov<mode>_internal"
    (set (attr "mode")
      (cond [(eq_attr "alternative" "2")
              (const_string "SI")
-           (eq_attr "alternative" "11,12")
+           (eq_attr "alternative" "12,13,14")
              (cond [(match_test "<MODE>mode == V2SFmode
                                  || <MODE>mode == V4HFmode
                                  || <MODE>mode == V4BFmode")
@@ -280,7 +287,7 @@ (define_insn "*mov<mode>_internal"
                    ]
                    (const_string "TI"))
 
-           (and (eq_attr "alternative" "13")
+           (and (eq_attr "alternative" "15")
                 (ior (ior (and (match_test "<MODE>mode == V2SFmode")
                                (not (match_test "TARGET_MMX_WITH_SSE")))
                           (not (match_test "TARGET_SSE2")))
@@ -288,7 +295,7 @@ (define_insn "*mov<mode>_internal"
                                  || <MODE>mode == V4BFmode")))
              (const_string "V2SF")
 
-           (and (eq_attr "alternative" "14")
+           (and (eq_attr "alternative" "16")
                 (ior (ior (match_test "<MODE>mode == V2SFmode")
                           (not (match_test "TARGET_SSE2")))
                      (match_test "<MODE>mode == V4HFmode
@@ -297,9 +304,9 @@ (define_insn "*mov<mode>_internal"
           ]
           (const_string "DI")))
    (set (attr "preferred_for_speed")
-     (cond [(eq_attr "alternative" "9,15")
+     (cond [(eq_attr "alternative" "10,17")
              (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
-           (eq_attr "alternative" "10,16")
+           (eq_attr "alternative" "11,18")
              (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
           ]
           (symbol_ref "true")))])
@@ -329,7 +336,7 @@ (define_expand "movmisalign<mode>"
 
 (define_expand "mov<mode>"
   [(set (match_operand:V_32 0 "nonimmediate_operand")
-       (match_operand:V_32 1 "nonimmediate_operand"))]
+       (match_operand:V_32 1 "nonimmediate_or_sse_const_operand"))]
   ""
 {
   ix86_expand_vector_move (<MODE>mode, operands);
@@ -338,9 +345,9 @@ (define_expand "mov<mode>"
 
 (define_insn "*mov<mode>_internal"
   [(set (match_operand:V_32 0 "nonimmediate_operand"
-    "=r ,m ,v,v,v,m,r,v")
-       (match_operand:V_32 1 "general_operand"
-    "rmC,rC,C,v,m,v,v,r"))]
+    "=r ,m ,m           ,v,v           ,v,v,m,r,v")
+       (match_operand:V_32 1 "nonimmediate_or_sse_const_operand"
+    "rmC,rC,<mmxconstm1>,C,<mmxconstm1>,v,m,v,v,r"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
    && ix86_hardreg_mov_ok (operands[0], operands[1])"
 {
@@ -360,14 +367,14 @@ (define_insn "*mov<mode>_internal"
     }
 }
   [(set (attr "isa")
-     (cond [(eq_attr "alternative" "6,7")
+     (cond [(eq_attr "alternative" "2,4,8,9")
              (const_string "sse2")
           ]
           (const_string "*")))
    (set (attr "type")
-     (cond [(eq_attr "alternative" "2")
+     (cond [(eq_attr "alternative" "3,4")
              (const_string "sselog1")
-           (eq_attr "alternative" "3,4,5,6,7")
+           (eq_attr "alternative" "5,6,7,8,9")
              (const_string "ssemov")
           ]
           (const_string "imov")))
@@ -380,7 +387,7 @@ (define_insn "*mov<mode>_internal"
        (const_string "1")
        (const_string "*")))
    (set (attr "mode")
-     (cond [(eq_attr "alternative" "2,3")
+     (cond [(eq_attr "alternative" "3,4,5")
              (cond [(match_test "<MODE>mode == V2HFmode
                                 || <MODE>mode == V2BFmode")
                       (const_string "V4SF")
@@ -392,7 +399,7 @@ (define_insn "*mov<mode>_internal"
                    ]
                    (const_string "TI"))
 
-           (and (eq_attr "alternative" "4,5")
+           (and (eq_attr "alternative" "6,7")
                 (ior (match_test "<MODE>mode == V2HFmode
                                 || <MODE>mode == V2BFmode")
                      (not (match_test "TARGET_SSE2"))))
@@ -400,9 +407,9 @@ (define_insn "*mov<mode>_internal"
           ]
           (const_string "SI")))
    (set (attr "preferred_for_speed")
-     (cond [(eq_attr "alternative" "6")
+     (cond [(eq_attr "alternative" "8")
              (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
-           (eq_attr "alternative" "7")
+           (eq_attr "alternative" "9")
              (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
           ]
           (symbol_ref "true")))])

Reply via email to