For evex encoding extended instructions, when vector length is less
than 512 bits, AVX512VL is needed, besides some instructions like
vpmovzxbx need extra AVX512BW. So this patch refines corresponding
constraints, i.e. from "v/vm" to "Yv/Yvm", from "v/vm" to "Yw/Ywm".

Bootstrapped and regtested on x86_64-linux-gnu{-m32,}
Ok for trunk?
and backport to GCC10 and GCC11?

BTW, I'm not sure should this patch be backport to GCC8 and GCC9 since
constraints "Yw" is introduced in GCC10, if we want to do that, we also
need to backport defination of "Yw".

gcc/ChangeLog:

        PR target/100885
        * config/i386/sse.md (*sse4_1_zero_extendv8qiv8hi2_3): Refine
        constraints.
        (avx2_<code>v8qiv8si2<mask_name>): Ditto.
        (*avx2_<code>v8qiv8si2<mask_name>_1): Ditto.
        (sse4_1_<code>v4qiv4si2<mask_name>): Ditto.
        (*sse4_1_<code>v4qiv4si2<mask_name>_1): Ditto.
        (avx2_<code>v8hiv8si2<mask_name>): Ditto.
        (avx2_zero_extendv8hiv8si2_1): Ditto.
        (sse4_1_<code>v4hiv4si2<mask_name>): Ditto.
        (*sse4_1_<code>v4hiv4si2<mask_name>_1): Ditto.
        (*sse4_1_zero_extendv4hiv4si2_3): Ditto.
        (avx2_<code>v4qiv4di2<mask_name>): Ditto.
        (*avx2_<code>v4qiv4di2<mask_name>_1): Ditto.
        (sse4_1_<code>v2qiv2di2<mask_name>): Ditto.
        (avx2_<code>v4hiv4di2<mask_name>): Ditto.
        (*avx2_<code>v4hiv4di2<mask_name>_1): Ditto.
        (sse4_1_<code>v2hiv2di2<mask_name>): Ditto.
        (*sse4_1_<code>v2hiv2di2<mask_name>_1): Ditto.
        (avx2_<code>v4siv4di2<mask_name>): Ditto.
        (*avx2_zero_extendv4siv4di2_1): Ditto.
        (<insn>v4siv4di2): Ditto.
        (sse4_1_<code>v2siv2di2<mask_name>): Ditto.
        (*sse4_1_<code>v2siv2di2<mask_name>_1): Ditto.
        (*sse4_1_zero_extendv2siv2di2_3): Ditto.

gcc/testsuite/ChangeLog:

        PR target/100885
        * g++.target/i386/pr100885.C: New test.
---
 gcc/config/i386/sse.md                   |  78 +++++------
 gcc/testsuite/g++.target/i386/pr100885.C | 159 +++++++++++++++++++++++
 2 files changed, 198 insertions(+), 39 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/i386/pr100885.C

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index a4503ddcb73..42e62fd4f89 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -18101,10 +18101,10 @@ (define_insn_and_split 
"*sse4_1_<code>v8qiv8hi2<mask_name>_2"
   "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
 
 (define_insn_and_split "*sse4_1_zero_extendv8qiv8hi2_3"
-  [(set (match_operand:V16QI 0 "register_operand" "=Yr,*x,v")
+  [(set (match_operand:V16QI 0 "register_operand" "=Yr,*x,Yw")
        (vec_select:V16QI
          (vec_concat:V32QI
-           (match_operand:V16QI 1 "vector_operand" "YrBm,*xBm,vm")
+           (match_operand:V16QI 1 "vector_operand" "YrBm,*xBm,Ywm")
            (match_operand:V16QI 2 "const0_operand" "C,C,C"))
          (match_parallel 3 "pmovzx_parallel"
            [(match_operand 4 "const_int_operand" "n,n,n")])))]
@@ -18163,10 +18163,10 @@ (define_expand "<insn>v16qiv16si2"
   "TARGET_AVX512F")
 
 (define_insn "avx2_<code>v8qiv8si2<mask_name>"
-  [(set (match_operand:V8SI 0 "register_operand" "=v")
+  [(set (match_operand:V8SI 0 "register_operand" "=Yv")
        (any_extend:V8SI
          (vec_select:V8QI
-           (match_operand:V16QI 1 "register_operand" "v")
+           (match_operand:V16QI 1 "register_operand" "Yv")
            (parallel [(const_int 0) (const_int 1)
                       (const_int 2) (const_int 3)
                       (const_int 4) (const_int 5)
@@ -18179,7 +18179,7 @@ (define_insn "avx2_<code>v8qiv8si2<mask_name>"
    (set_attr "mode" "OI")])
 
 (define_insn "*avx2_<code>v8qiv8si2<mask_name>_1"
-  [(set (match_operand:V8SI 0 "register_operand" "=v")
+  [(set (match_operand:V8SI 0 "register_operand" "=Yv")
        (any_extend:V8SI
          (match_operand:V8QI 1 "memory_operand" "m")))]
   "TARGET_AVX2 && <mask_avx512vl_condition>"
@@ -18225,10 +18225,10 @@ (define_expand "<insn>v8qiv8si2"
 })
 
 (define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
-  [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
+  [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,Yv")
        (any_extend:V4SI
          (vec_select:V4QI
-           (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
+           (match_operand:V16QI 1 "register_operand" "Yr,*x,Yv")
            (parallel [(const_int 0) (const_int 1)
                       (const_int 2) (const_int 3)]))))]
   "TARGET_SSE4_1 && <mask_avx512vl_condition>"
@@ -18240,7 +18240,7 @@ (define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
    (set_attr "mode" "TI")])
 
 (define_insn "*sse4_1_<code>v4qiv4si2<mask_name>_1"
-  [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
+  [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,Yv")
        (any_extend:V4SI
          (match_operand:V4QI 1 "memory_operand" "m,m,m")))]
   "TARGET_SSE4_1 && <mask_avx512vl_condition>"
@@ -18322,9 +18322,9 @@ (define_insn_and_split 
"avx512f_zero_extendv16hiv16si2_1"
 })
 
 (define_insn "avx2_<code>v8hiv8si2<mask_name>"
-  [(set (match_operand:V8SI 0 "register_operand" "=v")
+  [(set (match_operand:V8SI 0 "register_operand" "=Yv")
        (any_extend:V8SI
-           (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
+           (match_operand:V8HI 1 "nonimmediate_operand" "Yvm")))]
   "TARGET_AVX2 && <mask_avx512vl_condition>"
   "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssemov")
@@ -18339,10 +18339,10 @@ (define_expand "<insn>v8hiv8si2"
   "TARGET_AVX2")
 
 (define_insn_and_split "avx2_zero_extendv8hiv8si2_1"
-  [(set (match_operand:V16HI 0 "register_operand" "=v")
+  [(set (match_operand:V16HI 0 "register_operand" "=Yv")
        (vec_select:V16HI
          (vec_concat:V32HI
-           (match_operand:V16HI 1 "nonimmediate_operand" "vm")
+           (match_operand:V16HI 1 "nonimmediate_operand" "Yvm")
            (match_operand:V16HI 2 "const0_operand" "C"))
          (match_parallel 3 "pmovzx_parallel"
            [(match_operand 4 "const_int_operand" "n")])))]
@@ -18356,10 +18356,10 @@ (define_insn_and_split "avx2_zero_extendv8hiv8si2_1"
 })
 
 (define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
-  [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
+  [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,Yv")
        (any_extend:V4SI
          (vec_select:V4HI
-           (match_operand:V8HI 1 "register_operand" "Yr,*x,v")
+           (match_operand:V8HI 1 "register_operand" "Yr,*x,Yv")
            (parallel [(const_int 0) (const_int 1)
                       (const_int 2) (const_int 3)]))))]
   "TARGET_SSE4_1 && <mask_avx512vl_condition>"
@@ -18371,7 +18371,7 @@ (define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
    (set_attr "mode" "TI")])
 
 (define_insn "*sse4_1_<code>v4hiv4si2<mask_name>_1"
-  [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
+  [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,Yv")
        (any_extend:V4SI
          (match_operand:V4HI 1 "memory_operand" "m,m,m")))]
   "TARGET_SSE4_1 && <mask_avx512vl_condition>"
@@ -18416,10 +18416,10 @@ (define_expand "<insn>v4hiv4si2"
 })
 
 (define_insn_and_split "*sse4_1_zero_extendv4hiv4si2_3"
-  [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
+  [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,Yv")
        (vec_select:V8HI
          (vec_concat:V16HI
-           (match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,vm")
+           (match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,Yvm")
            (match_operand:V8HI 2 "const0_operand" "C,C,C"))
          (match_parallel 3 "pmovzx_parallel"
            [(match_operand 4 "const_int_operand" "n,n,n")])))]
@@ -18504,10 +18504,10 @@ (define_expand "<insn>v8qiv8di2"
 })
 
 (define_insn "avx2_<code>v4qiv4di2<mask_name>"
-  [(set (match_operand:V4DI 0 "register_operand" "=v")
+  [(set (match_operand:V4DI 0 "register_operand" "=Yv")
        (any_extend:V4DI
          (vec_select:V4QI
-           (match_operand:V16QI 1 "register_operand" "v")
+           (match_operand:V16QI 1 "register_operand" "Yv")
            (parallel [(const_int 0) (const_int 1)
                       (const_int 2) (const_int 3)]))))]
   "TARGET_AVX2 && <mask_avx512vl_condition>"
@@ -18518,7 +18518,7 @@ (define_insn "avx2_<code>v4qiv4di2<mask_name>"
    (set_attr "mode" "OI")])
 
 (define_insn "*avx2_<code>v4qiv4di2<mask_name>_1"
-  [(set (match_operand:V4DI 0 "register_operand" "=v")
+  [(set (match_operand:V4DI 0 "register_operand" "=Yv")
        (any_extend:V4DI
          (match_operand:V4QI 1 "memory_operand" "m")))]
   "TARGET_AVX2 && <mask_avx512vl_condition>"
@@ -18566,10 +18566,10 @@ (define_expand "<insn>v4qiv4di2"
 })
 
 (define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
-  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
+  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,Yv")
        (any_extend:V2DI
          (vec_select:V2QI
-           (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
+           (match_operand:V16QI 1 "register_operand" "Yr,*x,Yv")
            (parallel [(const_int 0) (const_int 1)]))))]
   "TARGET_SSE4_1 && <mask_avx512vl_condition>"
   "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
@@ -18608,10 +18608,10 @@ (define_expand "<insn>v8hiv8di2"
   "TARGET_AVX512F")
 
 (define_insn "avx2_<code>v4hiv4di2<mask_name>"
-  [(set (match_operand:V4DI 0 "register_operand" "=v")
+  [(set (match_operand:V4DI 0 "register_operand" "=Yv")
        (any_extend:V4DI
          (vec_select:V4HI
-           (match_operand:V8HI 1 "register_operand" "v")
+           (match_operand:V8HI 1 "register_operand" "Yv")
            (parallel [(const_int 0) (const_int 1)
                       (const_int 2) (const_int 3)]))))]
   "TARGET_AVX2 && <mask_avx512vl_condition>"
@@ -18622,7 +18622,7 @@ (define_insn "avx2_<code>v4hiv4di2<mask_name>"
    (set_attr "mode" "OI")])
 
 (define_insn "*avx2_<code>v4hiv4di2<mask_name>_1"
-  [(set (match_operand:V4DI 0 "register_operand" "=v")
+  [(set (match_operand:V4DI 0 "register_operand" "=Yv")
        (any_extend:V4DI
          (match_operand:V4HI 1 "memory_operand" "m")))]
   "TARGET_AVX2 && <mask_avx512vl_condition>"
@@ -18666,10 +18666,10 @@ (define_expand "<insn>v4hiv4di2"
 })
 
 (define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
-  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
+  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,Yv")
        (any_extend:V2DI
          (vec_select:V2HI
-           (match_operand:V8HI 1 "register_operand" "Yr,*x,v")
+           (match_operand:V8HI 1 "register_operand" "Yr,*x,Yv")
            (parallel [(const_int 0) (const_int 1)]))))]
   "TARGET_SSE4_1 && <mask_avx512vl_condition>"
   "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
@@ -18680,7 +18680,7 @@ (define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
    (set_attr "mode" "TI")])
 
 (define_insn "*sse4_1_<code>v2hiv2di2<mask_name>_1"
-  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
+  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,Yv")
        (any_extend:V2DI
          (match_operand:V2HI 1 "memory_operand" "m,m,m")))]
   "TARGET_SSE4_1 && <mask_avx512vl_condition>"
@@ -18761,9 +18761,9 @@ (define_expand "<insn>v8siv8di2"
   "TARGET_AVX512F")
 
 (define_insn "avx2_<code>v4siv4di2<mask_name>"
-  [(set (match_operand:V4DI 0 "register_operand" "=v")
+  [(set (match_operand:V4DI 0 "register_operand" "=Yv")
        (any_extend:V4DI
-           (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
+           (match_operand:V4SI 1 "nonimmediate_operand" "Yvm")))]
   "TARGET_AVX2 && <mask_avx512vl_condition>"
   "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssemov")
@@ -18772,10 +18772,10 @@ (define_insn "avx2_<code>v4siv4di2<mask_name>"
    (set_attr "mode" "OI")])
 
 (define_insn_and_split "*avx2_zero_extendv4siv4di2_1"
-  [(set (match_operand:V8SI 0 "register_operand" "=v")
+  [(set (match_operand:V8SI 0 "register_operand" "=Yv")
        (vec_select:V8SI
          (vec_concat:V16SI
-           (match_operand:V8SI 1 "nonimmediate_operand" "vm")
+           (match_operand:V8SI 1 "nonimmediate_operand" "Yvm")
            (match_operand:V8SI 2 "const0_operand" "C"))
          (match_parallel 3 "pmovzx_parallel"
            [(match_operand 4 "const_int_operand" "n")])))]
@@ -18789,16 +18789,16 @@ (define_insn_and_split "*avx2_zero_extendv4siv4di2_1"
 })
 
 (define_expand "<insn>v4siv4di2"
-  [(set (match_operand:V4DI 0 "register_operand" "=v")
+  [(set (match_operand:V4DI 0 "register_operand")
        (any_extend:V4DI
-           (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
+           (match_operand:V4SI 1 "nonimmediate_operand")))]
   "TARGET_AVX2")
 
 (define_insn "sse4_1_<code>v2siv2di2<mask_name>"
-  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
+  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,Yv")
        (any_extend:V2DI
          (vec_select:V2SI
-           (match_operand:V4SI 1 "register_operand" "Yr,*x,v")
+           (match_operand:V4SI 1 "register_operand" "Yr,*x,Yv")
            (parallel [(const_int 0) (const_int 1)]))))]
   "TARGET_SSE4_1 && <mask_avx512vl_condition>"
   "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
@@ -18809,7 +18809,7 @@ (define_insn "sse4_1_<code>v2siv2di2<mask_name>"
    (set_attr "mode" "TI")])
 
 (define_insn "*sse4_1_<code>v2siv2di2<mask_name>_1"
-  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
+  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,Yv")
        (any_extend:V2DI
          (match_operand:V2SI 1 "memory_operand" "m,m,m")))]
   "TARGET_SSE4_1 && <mask_avx512vl_condition>"
@@ -18838,10 +18838,10 @@ (define_insn_and_split 
"*sse4_1_<code>v2siv2di2<mask_name>_2"
   "operands[1] = adjust_address_nv (operands[1], V2SImode, 0);")
 
 (define_insn_and_split "*sse4_1_zero_extendv2siv2di2_3"
-  [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
+  [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,Yv")
        (vec_select:V4SI
          (vec_concat:V8SI
-           (match_operand:V4SI 1 "vector_operand" "YrBm,*xBm,vm")
+           (match_operand:V4SI 1 "vector_operand" "YrBm,*xBm,Yvm")
            (match_operand:V4SI 2 "const0_operand" "C,C,C"))
          (match_parallel 3 "pmovzx_parallel"
            [(match_operand 4 "const_int_operand" "n,n,n")])))]
diff --git a/gcc/testsuite/g++.target/i386/pr100885.C 
b/gcc/testsuite/g++.target/i386/pr100885.C
new file mode 100644
index 00000000000..09e11c033b9
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr100885.C
@@ -0,0 +1,159 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mavx512vl -mno-avx512bw -O2 -Wno-int-to-pointer-cast 
-std=c++14" } */
+
+typedef unsigned char byte;
+enum ZoomLevel { ZOOM_LVL_COUNT };
+struct Colour {
+  unsigned data;
+  Colour(int data) : data(data) {}
+};
+struct Palette {
+  Colour palette[6];
+};
+enum BlitterMode { BM_COLOUR_REMAP };
+class Blitter {
+public:
+  struct BlitterParams {
+    int width;
+    int height;
+    int left;
+    int top;
+    void *dst;
+  };
+  virtual void Draw();
+};
+class Blitter_32bppAnim : public Blitter {
+protected:
+  unsigned short anim_buf;
+  Palette palette;
+  int LookupColourInPalette_index;
+  Colour LookupColourInPalette() {
+    return palette.palette[LookupColourInPalette_index];
+  }
+};
+typedef int __m64 __attribute__((__vector_size__(8)));
+typedef int __m64_u __attribute__((__vector_size__(8), __may_alias__));
+typedef long long __v2di __attribute__((__vector_size__(16)));
+typedef short __v8hi __attribute__((__vector_size__(16)));
+typedef short __v8hu __attribute__((__vector_size__(16)));
+typedef char __v16qi __attribute__((__vector_size__(16)));
+typedef long long __m128i __attribute__((__vector_size__(16)));
+__m128i _mm_set_epi64(__m64 __q0) {
+  __m128i __trans_tmp_5{(long)__q0};
+  return __trans_tmp_5;
+}
+long _mm_storel_epi64___P, Draw_dsts;
+__m128i _mm_packus_epi16___B, _mm_subs_epu16___B, _mm_hadd_epi16___Y,
+    Draw_srcABCD, Draw___trans_tmp_10, Draw___trans_tmp_29, Draw___trans_tmp_7,
+    AlphaBlendTwoPixels___trans_tmp_12, AlphaBlendTwoPixels___trans_tmp_11,
+    AdjustBrightnessOfTwoPixels_from;
+__m128i _mm_unpacklo_epi8(__m128i __A) {
+  __m128i __B;
+  return (__m128i)__builtin_ia32_punpcklbw128((__v16qi)__A, (__v16qi)__B);
+}
+int _mm_srli_epi16___B;
+class Blitter_32bppSSE_Base {
+public:
+  enum ReadMode { RM_WITH_MARGIN };
+  enum BlockType { BT_NONE };
+  struct SpriteData {
+    int infos[ZOOM_LVL_COUNT];
+    byte data;
+  };
+};
+byte *Draw_remap;
+short Draw_si_0;
+class Blitter_32bppSSE4_Anim : Blitter_32bppAnim, Blitter_32bppSSE_Base {
+  template <BlitterMode, ReadMode, BlockType, bool, bool>
+  void Draw(const BlitterParams *, ZoomLevel);
+  void Draw();
+};
+__m128i AdjustBrightnessOfTwoPixels() {
+  __m128i __trans_tmp_28, __trans_tmp_27, __trans_tmp_26, __trans_tmp_24,
+      __trans_tmp_23, __trans_tmp_22, __trans_tmp_21, __trans_tmp_20,
+      __trans_tmp_19, __trans_tmp_18, __trans_tmp_17, __trans_tmp_16,
+      __trans_tmp_14 = (__m128i)__builtin_ia32_psrlwi128(
+          (__v8hi)AdjustBrightnessOfTwoPixels_from, _mm_srli_epi16___B),
+      __trans_tmp_7;
+  char __trans_tmp_8;
+  __trans_tmp_7 = __m128i{__trans_tmp_8};
+  {
+    __m128i __trans_tmp_7;
+    char __trans_tmp_8;
+    __trans_tmp_7 = __m128i{__trans_tmp_8};
+    __trans_tmp_26 = __trans_tmp_7;
+  }
+  __trans_tmp_16 = (__v8hi)__trans_tmp_14 > (__v8hi)__trans_tmp_26;
+  __trans_tmp_17 = (__m128i)__builtin_ia32_phaddw128(
+      (__v8hi)__trans_tmp_16, (__v8hi)_mm_hadd_epi16___Y);
+  __trans_tmp_18 = (__m128i)__builtin_ia32_phaddw128(
+      (__v8hi)__trans_tmp_17, (__v8hi)_mm_hadd_epi16___Y);
+  __trans_tmp_19 = (__m128i)__builtin_ia32_psrlwi128((__v8hi)__trans_tmp_18,
+                                                     _mm_srli_epi16___B);
+  {
+    __m128i __trans_tmp_7;
+    char __trans_tmp_8;
+    __trans_tmp_7 = __m128i{__trans_tmp_8};
+    __trans_tmp_27 = __trans_tmp_7;
+  }
+  __trans_tmp_20 = (__m128i)__builtin_ia32_pshufb128((__v16qi)__trans_tmp_19,
+                                                     (__v16qi)__trans_tmp_27);
+  {
+    __m128i __trans_tmp_7;
+    char __trans_tmp_8;
+    __trans_tmp_7 = __m128i{__trans_tmp_8};
+    __trans_tmp_28 = __trans_tmp_7;
+  }
+  __trans_tmp_21 = (__m128i)__builtin_ia32_psubusw128(
+      (__v8hi)__trans_tmp_28, (__v8hi)_mm_subs_epu16___B);
+  __trans_tmp_22 = __m128i((__v8hu)__trans_tmp_21 * (__v8hu)__trans_tmp_20);
+  __trans_tmp_23 = __m128i((__v8hu)__trans_tmp_22 + (__v8hu)__trans_tmp_7);
+  __trans_tmp_24 = (__m128i)__builtin_ia32_packuswb128(
+      (__v8hi)__trans_tmp_23, (__v8hi)_mm_packus_epi16___B);
+  return __trans_tmp_24;
+}
+template <BlitterMode, Blitter_32bppSSE_Base::ReadMode,
+          Blitter_32bppSSE_Base::BlockType, bool, bool>
+void Blitter_32bppSSE4_Anim::Draw(const BlitterParams *bp, ZoomLevel zoom) {
+  __m128i __trans_tmp_30;
+  Colour *dst_line = (Colour *)bp->dst + bp->left;
+  unsigned short *anim_line = &anim_buf + bp->top;
+  int effective_width;
+  SpriteData *sd = (SpriteData *)bp;
+  Colour *src_rgba_line = (Colour *)sd->data;
+  Draw___trans_tmp_29 = Draw___trans_tmp_7;
+  for (int y = bp->height; y; y--) {
+    Colour *dst = dst_line;
+    unsigned short *anim = anim_line;
+    anim += src_rgba_line[0].data;
+    dst += src_rgba_line[0].data;
+    int width_diff = Draw_si_0 - bp->width;
+    effective_width = width_diff ?: effective_width;
+    for (int x = effective_width; x; x--) {
+      int mvX2 = *(unsigned *)sd->infos[zoom], m = byte(mvX2);
+      __trans_tmp_30 = _mm_set_epi64(*(__m64_u *)dst);
+      Colour c0 = Draw_dsts, srcm(0), cmap = LookupColourInPalette().data & 40;
+      c0 = Draw_remap[m] ?: cmap;
+      c0 = m ? c0 : srcm;
+      Draw___trans_tmp_10 = __v2di{c0.data};
+      if (mvX2)
+        Draw_srcABCD = AdjustBrightnessOfTwoPixels();
+      if (src_rgba_line)
+        anim[1] = 0;
+      __m128i dstAB = _mm_unpacklo_epi8(__trans_tmp_30);
+      AlphaBlendTwoPixels___trans_tmp_12 =
+          __m128i((__v8hu)Draw_srcABCD + (__v8hu)dstAB);
+      AlphaBlendTwoPixels___trans_tmp_11 = (__m128i)__builtin_ia32_pshufb128(
+          (__v16qi)AlphaBlendTwoPixels___trans_tmp_12,
+          (__v16qi)Draw___trans_tmp_7);
+      *(__m64_u *)_mm_storel_epi64___P =
+          (__m64)AlphaBlendTwoPixels___trans_tmp_11[0];
+    }
+  }
+}
+Blitter::BlitterParams Draw_bp;
+ZoomLevel Draw_zoom;
+void Blitter_32bppSSE4_Anim::Draw() {
+  Draw<BM_COLOUR_REMAP, RM_WITH_MARGIN, BT_NONE, true, false>(&Draw_bp,
+                                                              Draw_zoom);
+}
-- 
2.18.1

Reply via email to