For all different modes of all 0s/1s vectors, we can use the single widest
all 0s/1s vector register for all 0s/1s vector uses in the whole function.
Add a pass to generate a single widest all 0s/1s vector set instruction at
entry of the nearest common dominator for basic blocks with all 0s/1s
vector uses.  On Linux/x86-64, in cc1plus, this patch reduces the number
of vector xor instructions from 4803 to 4714 and pcmpeq instructions from
144 to 142.

This change causes a regression:

FAIL: gcc.dg/rtl/x86_64/vector_eq.c

without the fix for

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=117863

NB: PR target/92080 and PR target/117839 aren't same.  PR target/117839
is for vectors of all 0s and all 1s with different sizes and different
components.  PR target/92080 is for broadcast of the same component to
different vector sizes.  This patch covers only all 0s and all 1s cases
of PR target/92080.

gcc/

        PR target/92080
        PR target/117839
        * config/i386/i386-features.cc (ix86_rrvl_gate): New.
        (ix86_place_single_vector_set): Likewise.
        (ix86_get_vector_load_mode): Likewise.
        (remove_redundant_vector_load): Likewise.
        (pass_data_remove_redundant_vector_load): Likewise.
        (pass_remove_redundant_vector_load): Likewise.
        (make_pass_remove_redundant_vector_load): Likewise.
        * config/i386/i386-passes.def: Add
        pass_remove_redundant_vector_load after
        pass_remove_partial_avx_dependency.
        * config/i386/i386-protos.h
        (make_pass_remove_redundant_vector_load): New.

gcc/testsuite/

        PR target/92080
        PR target/117839
        * gcc.target/i386/pr117839-1a.c: New test.
        * gcc.target/i386/pr117839-1b.c: Likewise.
        * gcc.target/i386/pr117839-2.c: Likewise.
        * gcc.target/i386/pr92080-1.c: Likewise.
        * gcc.target/i386/pr92080-2.c: Likewise.
        * gcc.target/i386/pr92080-3.c: Likewise.

Signed-off-by: H.J. Lu <hjl.to...@gmail.com>
---
 gcc/config/i386/i386-features.cc            | 308 ++++++++++++++++++++
 gcc/config/i386/i386-passes.def             |   1 +
 gcc/config/i386/i386-protos.h               |   2 +
 gcc/testsuite/gcc.target/i386/pr117839-1a.c |  35 +++
 gcc/testsuite/gcc.target/i386/pr117839-1b.c |   5 +
 gcc/testsuite/gcc.target/i386/pr117839-2.c  |  40 +++
 gcc/testsuite/gcc.target/i386/pr92080-1.c   |  54 ++++
 gcc/testsuite/gcc.target/i386/pr92080-2.c   |  59 ++++
 gcc/testsuite/gcc.target/i386/pr92080-3.c   |  48 +++
 9 files changed, 552 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr117839-1a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr117839-1b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr117839-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr92080-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr92080-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr92080-3.c

diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index 003b003e09c..7d8d260750d 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -3288,6 +3288,314 @@ make_pass_remove_partial_avx_dependency (gcc::context 
*ctxt)
   return new pass_remove_partial_avx_dependency (ctxt);
 }
 
+static bool
+ix86_rrvl_gate ()
+{
+  return (TARGET_SSE2
+         && optimize
+         && optimize_function_for_speed_p (cfun));
+}
+
+/* Generate a vector set, DEST = SRC, at entry of the nearest dominator
+   for basic block map BBS, which is in the fake loop that contains the
+   whole function, so that there is only a single vector set in the
+   whole function.   */
+
+static void
+ix86_place_single_vector_set (rtx dest, rtx src, bitmap bbs)
+{
+  basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs);
+  while (bb->loop_father->latch
+        != EXIT_BLOCK_PTR_FOR_FN (cfun))
+    bb = get_immediate_dominator (CDI_DOMINATORS,
+                                 bb->loop_father->header);
+
+  rtx set = gen_rtx_SET (dest, src);
+
+  rtx_insn *insn = BB_HEAD (bb);
+  while (insn && !NONDEBUG_INSN_P (insn))
+    {
+      if (insn == BB_END (bb))
+       {
+         insn = NULL;
+         break;
+       }
+      insn = NEXT_INSN (insn);
+    }
+
+  rtx_insn *set_insn;
+  if (insn == BB_HEAD (bb))
+    set_insn = emit_insn_before (set, insn);
+  else
+    set_insn = emit_insn_after (set,
+                               insn ? PREV_INSN (insn) : BB_END (bb));
+  df_insn_rescan (set_insn);
+}
+
+/* Return a machine mode suitable for vector SIZE.  */
+
+static machine_mode
+ix86_get_vector_load_mode (unsigned int size)
+{
+  machine_mode mode;
+  if (size == 64)
+    mode = V64QImode;
+  else if (size == 32)
+    mode = V32QImode;
+  else
+    mode = V16QImode;
+  return mode;
+}
+
+/* At entry of the nearest common dominator for basic blocks with vector
+   CONST0_RTX and integer CONSTM1_RTX uses, generate a single widest
+   vector set instruction for all CONST0_RTX and integer CONSTM1_RTX
+   uses.
+
+   NB: We want to generate only a single widest vector set to cover the
+   whole function.  The LCM algorithm isn't appropriate here since it
+   may place a vector set inside the loop.  */
+
+static unsigned int
+remove_redundant_vector_load (void)
+{
+  timevar_push (TV_MACH_DEP);
+
+  bitmap_obstack_initialize (NULL);
+  bitmap zero_bbs = BITMAP_ALLOC (NULL);
+  bitmap m1_bbs = BITMAP_ALLOC (NULL);
+  bitmap vector_insns = BITMAP_ALLOC (NULL);
+
+  basic_block bb;
+  rtx_insn *insn;
+  rtx set;
+  unsigned HOST_WIDE_INT zero_count = 0;
+  unsigned HOST_WIDE_INT m1_count = 0;
+  unsigned int zero_size = 0;
+  unsigned int m1_size = 0;
+
+  df_set_flags (DF_DEFER_INSN_RESCAN);
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      FOR_BB_INSNS (bb, insn)
+       {
+         if (!NONDEBUG_INSN_P (insn))
+           continue;
+
+         set = single_set (insn);
+         if (!set)
+           continue;
+
+         rtx dest = SET_DEST (set);
+         machine_mode mode = GET_MODE (dest);
+         /* Skip non-vector instruction.  */
+         if (!VECTOR_MODE_P (mode))
+           continue;
+
+         rtx src = SET_SRC (set);
+         if (!REG_P (dest)
+             || (src != CONST0_RTX (mode)
+                 && !(GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+                      && src == CONSTM1_RTX (mode))))
+           {
+             /* Record non-CONST0_RTX/CONSTM1_RTX vector instruction.  */
+             bitmap_set_bit (vector_insns, INSN_UID (insn));
+             continue;
+           }
+
+         if (src == CONST0_RTX (mode))
+           {
+             /* Record the maximum vector size.  */
+             if (zero_size < GET_MODE_SIZE (mode))
+               zero_size = GET_MODE_SIZE (mode);
+
+             /* Record the basic block with CONST0_RTX.  */
+             bitmap_set_bit (zero_bbs, bb->index);
+             zero_count++;
+           }
+         else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+                  && src == CONSTM1_RTX (mode))
+           {
+             /* Record the maximum vector size.  */
+             if (m1_size < GET_MODE_SIZE (mode))
+               m1_size = GET_MODE_SIZE (mode);
+
+             /* Record the basic block with CONSTM1_RTX.  */
+             bitmap_set_bit (m1_bbs, bb->index);
+             m1_count++;
+           }
+       }
+    }
+
+  if (zero_count > 1 || m1_count > 1)
+    {
+      machine_mode zero_mode, m1_mode;
+      rtx vector_const0, vector_constm1;
+      if (zero_count > 1)
+       {
+         zero_mode = ix86_get_vector_load_mode (zero_size);
+         vector_const0 = gen_reg_rtx (zero_mode);
+       }
+      else
+       {
+         zero_mode = VOIDmode;
+         vector_const0 = nullptr;
+       }
+      if (m1_count > 1)
+       {
+         m1_mode = ix86_get_vector_load_mode (m1_size);
+         vector_constm1 = gen_reg_rtx (m1_mode);
+       }
+      else
+       {
+         m1_mode = VOIDmode;
+         vector_constm1 = nullptr;
+       }
+
+      bool zero_replaced = false;
+      bool m1_replaced = false;
+
+      bitmap_iterator bi;
+      unsigned id;
+      EXECUTE_IF_SET_IN_BITMAP (vector_insns, 0, id, bi)
+       {
+         /* Replace CONST0_RTX and integer CONSTM1_RTX with the single
+            CONST0_RTX and integer CONSTM1_RTX register.  */
+         df_ref ref, def;
+         insn = DF_INSN_UID_GET (id)->insn;
+         bool replaced = false;
+
+         for (ref = DF_INSN_UID_USES (id);
+              ref;
+              ref = DF_REF_NEXT_LOC (ref))
+           {
+             if (DF_REF_TYPE (ref) != DF_REF_REG_USE)
+               continue;
+
+             /* Skip non-vector register.  */
+             rtx reg = DF_REF_REG (ref);
+             if (!VECTOR_MODE_P (GET_MODE (reg)))
+               continue;
+
+             /* Check the single definition.  */
+             def = DF_REG_DEF_CHAIN (DF_REF_REGNO (ref));
+             if (!def || DF_REF_NEXT_REG (def) != nullptr)
+               continue;
+
+             /* Get the single definition.  */
+             rtx_insn *def_insn = DF_REF_INSN (def);
+             set = single_set (def_insn);
+             if (!set)
+               continue;
+
+             /* Check the single definition of vector constant zero.  */
+             rtx src = SET_SRC (set);
+             rtx replace;
+             if (vector_const0 && src == CONST0_RTX (GET_MODE (src)))
+               {
+                 /* Replace REG with VECTOR_CONST0.  */
+                 if (SUBREG_P (reg) || GET_MODE (reg) == zero_mode)
+                   replace = vector_const0;
+                 else
+                   replace = gen_rtx_SUBREG (GET_MODE (reg),
+                                             vector_const0, 0);
+                 *DF_REF_REAL_LOC (ref) = replace;
+                 replaced = true;
+                 zero_replaced = true;
+               }
+             else if (vector_constm1
+                      && src == CONSTM1_RTX (GET_MODE (src)))
+               {
+                 /* Replace REG with VECTOR_CONSTM1.  */
+                 if (SUBREG_P (reg) || GET_MODE (reg) == m1_mode)
+                   replace = vector_constm1;
+                 else
+                   replace = gen_rtx_SUBREG (GET_MODE (reg),
+                                             vector_constm1, 0);
+                 *DF_REF_REAL_LOC (ref) = replace;
+                 replaced = true;
+                 m1_replaced = true;
+               }
+           }
+
+         if (replaced)
+           df_insn_rescan (insn);
+       }
+
+      /* (Re-)discover loops so that bb->loop_father can be used in the
+        analysis below.  */
+      calculate_dominance_info (CDI_DOMINATORS);
+      loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
+
+      if (zero_replaced)
+       ix86_place_single_vector_set (vector_const0,
+                                     CONST0_RTX (zero_mode),
+                                     zero_bbs);
+
+      if (m1_replaced)
+       ix86_place_single_vector_set (vector_constm1,
+                                     CONSTM1_RTX (m1_mode),
+                                     m1_bbs);
+
+      loop_optimizer_finalize ();
+
+      df_process_deferred_rescans ();
+    }
+
+  df_clear_flags (DF_DEFER_INSN_RESCAN);
+
+  bitmap_obstack_release (NULL);
+  BITMAP_FREE (zero_bbs);
+  BITMAP_FREE (m1_bbs);
+  BITMAP_FREE (vector_insns);
+
+  timevar_pop (TV_MACH_DEP);
+  return 0;
+}
+
+namespace {
+
+const pass_data pass_data_remove_redundant_vector_load =
+{
+  RTL_PASS, /* type */
+  "rrvl", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_MACH_DEP, /* tv_id */
+  0, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
+};
+
+class pass_remove_redundant_vector_load : public rtl_opt_pass
+{
+public:
+  pass_remove_redundant_vector_load (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_remove_redundant_vector_load, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate (function *) final override
+    {
+      return ix86_rrvl_gate ();
+    }
+
+  unsigned int execute (function *) final override
+    {
+      return remove_redundant_vector_load ();
+    }
+}; // class pass_remove_redundant_vector_load
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_remove_redundant_vector_load (gcc::context *ctxt)
+{
+  return new pass_remove_redundant_vector_load (ctxt);
+}
+
 /* Convert legacy instructions that clobbers EFLAGS to APX_NF
    instructions when there are no flag set between a flag
    producer and user.  */
diff --git a/gcc/config/i386/i386-passes.def b/gcc/config/i386/i386-passes.def
index a9d350dcfca..df424cdb9c7 100644
--- a/gcc/config/i386/i386-passes.def
+++ b/gcc/config/i386/i386-passes.def
@@ -35,5 +35,6 @@ along with GCC; see the file COPYING3.  If not see
      PR116174.  */
   INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_align_tight_loops);
 
+  INSERT_PASS_AFTER (pass_late_combine, 1, pass_remove_redundant_vector_load);
   INSERT_PASS_AFTER (pass_late_combine, 1, pass_remove_partial_avx_dependency);
   INSERT_PASS_AFTER (pass_rtl_ifcvt, 1, pass_apx_nf_convert);
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 78e72c50c6d..4c3a8bd326c 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -426,6 +426,8 @@ extern rtl_opt_pass 
*make_pass_insert_endbr_and_patchable_area
   (gcc::context *);
 extern rtl_opt_pass *make_pass_remove_partial_avx_dependency
   (gcc::context *);
+extern rtl_opt_pass *make_pass_remove_redundant_vector_load
+  (gcc::context *);
 extern rtl_opt_pass *make_pass_apx_nf_convert (gcc::context *);
 extern rtl_opt_pass *make_pass_align_tight_loops (gcc::context *);
 
diff --git a/gcc/testsuite/gcc.target/i386/pr117839-1a.c 
b/gcc/testsuite/gcc.target/i386/pr117839-1a.c
new file mode 100644
index 00000000000..4501cfbcad4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr117839-1a.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+/* { dg-final { scan-assembler-times "xor\[a-z\]*\[\t 
\]*%xmm\[0-9\]\+,\[^,\]*" 1 } } */
+
+#include <stddef.h>
+
+void
+clear_memory (void *mem1, size_t nclears1, void *mem2, size_t nclears2)
+{
+  size_t *d1 = (size_t *) mem1;
+
+  *(d1 + 0) = 0;
+  *(d1 + 1) = 0;
+  *(d1 + 2) = 0;
+  if (nclears1 > 3)
+    {
+      *(d1 + nclears1 - 4) = 0;
+      *(d1 + nclears1 - 4 + 1) = 0;
+      *(d1 + nclears1 - 4 + 2) = 0;
+      *(d1 + nclears1 - 4 + 3) = 0;
+    }
+
+  double *d2 = (double *) mem2;
+
+  *(d2 + 0) = 0;
+  *(d2 + 1) = 0;
+  *(d2 + 2) = 0;
+  if (nclears2 > 3)
+    {
+      *(d2 + nclears2 - 4) = 0;
+      *(d2 + nclears2 - 4 + 1) = 0;
+      *(d2 + nclears2 - 4 + 2) = 0;
+      *(d2 + nclears2 - 4 + 3) = 0;
+    }
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr117839-1b.c 
b/gcc/testsuite/gcc.target/i386/pr117839-1b.c
new file mode 100644
index 00000000000..e71b991a207
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr117839-1b.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v3" } */
+/* { dg-final { scan-assembler-times "xor\[a-z\]*\[\t 
\]*%xmm\[0-9\]\+,\[^,\]*" 1 } } */
+
+#include "pr117839-1a.c"
diff --git a/gcc/testsuite/gcc.target/i386/pr117839-2.c 
b/gcc/testsuite/gcc.target/i386/pr117839-2.c
new file mode 100644
index 00000000000..c76744cf98b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr117839-2.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v3" } */
+/* { dg-final { scan-assembler-times "xor\[a-z\]*\[\t 
\]*%xmm\[0-9\]\+,\[^,\]*" 1 } } */
+
+#include <stddef.h>
+
+float
+clear_memory (void *mem, size_t clearsize)
+{
+  size_t *d = (size_t *) mem;
+  size_t nclears = clearsize / sizeof (size_t);
+
+  *(d + 0) = 0;
+  *(d + 1) = 0;
+  *(d + 2) = 0;
+  if (nclears > 9)
+    {
+      *(d + 5) = 0;
+      *(d + 5 + 1) = 0;
+      *(d + 5 + 2) = 0;
+      *(d + 5 + 3) = 0;
+      *(d + nclears - 8) = 0;
+      *(d + nclears - 8 + 1) = 0;
+      *(d + nclears - 8 + 2) = 0;
+      *(d + nclears - 8 + 3) = 0;
+    }
+  else
+    {
+      *(d + 1) = 0;
+      *(d + 2) = 0;
+      *(d + 3) = 0;
+      *(d + 4) = 0;
+      *(d + nclears - 4) = 0;
+      *(d + nclears - 4 + 1) = 0;
+      *(d + nclears - 4 + 2) = 0;
+      *(d + nclears - 4 + 3) = 0;
+    }
+
+  return nclears;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr92080-1.c 
b/gcc/testsuite/gcc.target/i386/pr92080-1.c
new file mode 100644
index 00000000000..7059b4514eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr92080-1.c
@@ -0,0 +1,54 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v3" } */
+/* { dg-final { scan-assembler-times "vpxor" 2 } } */
+/* { dg-final { scan-assembler-times "vpcmpeq" 2 } } */
+
+typedef int v4si __attribute__((vector_size(16)));
+typedef int v8si __attribute__((vector_size(32)));
+typedef short v8hi __attribute__((vector_size(16)));
+typedef short v16hi __attribute__((vector_size(32)));
+typedef char v16qi __attribute__((vector_size(16)));
+typedef char v32qi __attribute__((vector_size(32)));
+
+v16qi b1;
+v8hi h1;
+v4si s1;
+v32qi b2;
+v16hi h2;
+v8si s2;
+
+void
+foo ()
+{
+  s1 = __extension__(v4si){0, 0, 0, 0};
+  h1 = __extension__(v8hi){0, 0, 0, 0, 0, 0, 0, 0};
+  b1 = __extension__(v16qi){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  h2 = __extension__(v16hi){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+}
+
+void
+foo1 ()
+{
+  s1 = __extension__(v4si){-1, -1, -1, -1};
+  h1 = __extension__(v8hi){-1, -1, -1, -1, -1, -1, -1, -1};
+  b1 = __extension__(v16qi){-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
-1, -1, -1, -1};
+}
+
+
+void
+foo2 ()
+{
+  s2 = __extension__(v8si){0, 0, 0, 0, 0, 0, 0, 0};
+  h1 = __extension__(v8hi){0, 0, 0, 0, 0, 0, 0, 0};
+  b1 = __extension__(v16qi){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  b2 = __extension__(v32qi){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+}
+
+void
+foo3 ()
+{
+  s2 = __extension__(v8si){-1, -1, -1, -1, -1, -1, -1, -1};
+  h1 = __extension__(v8hi){-1, -1, -1, -1, -1, -1, -1, -1};
+  b1 = __extension__(v16qi){-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
-1, -1, -1, -1};
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr92080-2.c 
b/gcc/testsuite/gcc.target/i386/pr92080-2.c
new file mode 100644
index 00000000000..d160d90de53
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr92080-2.c
@@ -0,0 +1,59 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v3" } */
+/* { dg-final { scan-assembler-times "vpxor" 1 } } */
+/* { dg-final { scan-assembler-times "vpcmpeq" 1 } } */
+
+typedef int v4si __attribute__((vector_size(16)));
+typedef int v8si __attribute__((vector_size(32)));
+typedef short v8hi __attribute__((vector_size(16)));
+typedef short v16hi __attribute__((vector_size(32)));
+typedef char v16qi __attribute__((vector_size(16)));
+typedef char v32qi __attribute__((vector_size(32)));
+
+v16qi b1;
+v8hi h1;
+v4si s1;
+v32qi b2;
+v16hi h2;
+v8si s2;
+
+void
+foo (int i, int j)
+{
+  switch (i)
+    {
+    case 1:
+      h1 = __extension__(v8hi){-1, -1, -1, -1, -1, -1, -1, -1};
+      s1 = __extension__(v4si){0, 0, 0, 0};
+      s2 = __extension__(v8si){0, 0, 0, 0, 0, 0, 0, 0};
+      break;
+    case 2:
+      h1 = __extension__(v8hi){0, 0, 0, 0, 0, 0, 0, 0};
+      b1 = __extension__(v16qi){-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
-1, -1, -1, -1, -1};
+      break;
+    case 3:
+      h1 = __extension__(v8hi){0, 0, 0, 0, 0, 0, 0, 0};
+      b1 = __extension__(v16qi){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0};
+      break;
+    default:
+      break;
+    }
+
+  switch (i)
+    {
+    case 1:
+      s1 = __extension__(v4si){-1, -1, -1, -1};
+      b2 = __extension__(v32qi){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+      h2 = __extension__(v16hi){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0};
+      break;
+    case 2:
+      b1 = __extension__(v16qi){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0};
+      h1 = __extension__(v8hi){-1, -1, -1, -1, -1, -1, -1, -1};
+      break;
+    case 3:
+      b1 = __extension__(v16qi){-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
-1, -1, -1, -1, -1};
+      s2 = __extension__(v8si){-1, -1, -1, -1, -1, -1, -1, -1};
+      break;
+    }
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr92080-3.c 
b/gcc/testsuite/gcc.target/i386/pr92080-3.c
new file mode 100644
index 00000000000..2174def4e6d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr92080-3.c
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64" } */
+/* { dg-final { scan-assembler-times "pxor" 1 } } */
+/* { dg-final { scan-assembler-times "pcmpeq" 1 } } */
+
+typedef int v4si __attribute__((vector_size(16)));
+typedef short v8hi __attribute__((vector_size(16)));
+typedef char v16qi __attribute__((vector_size(16)));
+
+v16qi b1;
+v8hi h1;
+v4si s1;
+
+void
+foo (int i, int j)
+{
+  switch (i)
+    {
+    case 1:
+      h1 = __extension__(v8hi){-1, -1, -1, -1, -1, -1, -1, -1};
+      s1 = __extension__(v4si){0, 0, 0, 0};
+      break;
+    case 2:
+      h1 = __extension__(v8hi){0, 0, 0, 0, 0, 0, 0, 0};
+      b1 = __extension__(v16qi){-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
-1, -1, -1, -1, -1};
+      break;
+    case 3:
+      h1 = __extension__(v8hi){0, 0, 0, 0, 0, 0, 0, 0};
+      b1 = __extension__(v16qi){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0};
+      break;
+    default:
+      break;
+    }
+
+  switch (i)
+    {
+    case 1:
+      s1 = __extension__(v4si){-1, -1, -1, -1};
+      break;
+    case 2:
+      b1 = __extension__(v16qi){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0};
+      h1 = __extension__(v8hi){-1, -1, -1, -1, -1, -1, -1, -1};
+      break;
+    case 3:
+      b1 = __extension__(v16qi){-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
-1, -1, -1, -1, -1};
+      break;
+    }
+}
-- 
2.47.1

Reply via email to