Hi,

On Fri, Feb 4, 2022 at 10:43 AM Richard Sandiford <richard.sandif...@arm.com>
wrote:

> Christophe Lyon <christophe.lyon....@gmail.com> writes:
> > On Tue, Feb 1, 2022 at 4:42 AM Richard Sandiford <
> richard.sandif...@arm.com>
> > wrote:
> >
> >> Christophe Lyon via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> >> > On Mon, Jan 31, 2022 at 7:01 PM Richard Sandiford via Gcc-patches <
> >> > gcc-patches@gcc.gnu.org> wrote:
> >> >
> >> >> Sorry for the slow response, was out last week.
> >> >>
> >> >> Christophe Lyon via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> >> >> > diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c
> >> >> > index feeee16d320..5f559f8fd93 100644
> >> >> > --- a/gcc/emit-rtl.c
> >> >> > +++ b/gcc/emit-rtl.c
> >> >> > @@ -6239,9 +6239,14 @@ init_emit_once (void)
> >> >> >
> >> >> >    /* For BImode, 1 and -1 are unsigned and signed interpretations
> >> >> >       of the same value.  */
> >> >> > -  const_tiny_rtx[0][(int) BImode] = const0_rtx;
> >> >> > -  const_tiny_rtx[1][(int) BImode] = const_true_rtx;
> >> >> > -  const_tiny_rtx[3][(int) BImode] = const_true_rtx;
> >> >> > +  for (mode = MIN_MODE_BOOL;
> >> >> > +       mode <= MAX_MODE_BOOL;
> >> >> > +       mode = (machine_mode)((int)(mode) + 1))
> >> >> > +    {
> >> >> > +      const_tiny_rtx[0][(int) mode] = const0_rtx;
> >> >> > +      const_tiny_rtx[1][(int) mode] = const_true_rtx;
> >> >> > +      const_tiny_rtx[3][(int) mode] = const_true_rtx;
> >> >> > +    }
> >> >> >
> >> >> >    for (mode = MIN_MODE_PARTIAL_INT;
> >> >> >         mode <= MAX_MODE_PARTIAL_INT;
> >> >>
> >> >> Does this do the right thing for:
> >> >>
> >> >>   gen_int_mode (-1, B2Imode)
> >> >>
> >> >> (which is used e.g. in native_decode_vector_rtx)?  It looks like it
> >> >> would give 0b01 rather than 0b11.
> >> >>
> >> >> Maybe for non-BImode we should use const1_rtx and constm1_rtx, like
> with
> >> >> MODE_INT.
> >> >>
> >> >
> >> > debug_rtx ( gen_int_mode (-1, B2Imode) says:
> >> > (const_int -1 [0xffffffffffffffff])
> >> > so that looks right?
> >>
> >> Ah, right, I forgot that the mode is unused for the small constant
> lookup.
> >> But it looks like CONSTM1_RTX (B2Imode) would be (const_int 1) instead,
> >> even though the two should be equal.
> >>
> >
> > Indeed!
> >
> > So I changed the above loop into:
> >    /* For BImode, 1 and -1 are unsigned and signed interpretations
> >      of the same value.  */
> >   for (mode = MIN_MODE_BOOL;
> >        mode <= MAX_MODE_BOOL;
> >        mode = (machine_mode)((int)(mode) + 1))
> >     {
> >       const_tiny_rtx[0][(int) mode] = const0_rtx;
> >       const_tiny_rtx[1][(int) mode] = const_true_rtx;
> > -      const_tiny_rtx[3][(int) mode] = const_true_rtx;
> > +      const_tiny_rtx[3][(int) mode] = constm1_rtx;
> >     }
> > which works, both constants are now equal and the validation still
> passes.
>
> I think we need to keep const_true_rtx for both [BImode][1] and
> [BImode][3].
> BImode is an awkward special case in that the (only) nonzero value must be
> exactly STORE_FLAG_VALUE, even if that leads to an otherwise non-canonical
> const_int representation.
>

OK, done.


>
> For the multi-bit booleans, [1] needs to be const1_rtx rather than
> const_true_rtx in case STORE_FLAG_VALUE != 1.
>
> >> >> > @@ -1679,15 +1708,25 @@ emit_class_narrowest_mode (void)
> >> >> >    print_decl ("unsigned char", "class_narrowest_mode",
> >> >> "MAX_MODE_CLASS");
> >> >> >
> >> >> >    for (c = 0; c < MAX_MODE_CLASS; c++)
> >> >> > -    /* Bleah, all this to get the comment right for
> MIN_MODE_INT.  */
> >> >> > -    tagged_printf ("MIN_%s", mode_class_names[c],
> >> >> > -                modes[c]
> >> >> > -                ? ((c != MODE_INT || modes[c]->precision != 1)
> >> >> > -                   ? modes[c]->name
> >> >> > -                   : (modes[c]->next
> >> >> > -                      ? modes[c]->next->name
> >> >> > -                      : void_mode->name))
> >> >> > -                : void_mode->name);
> >> >> > +    {
> >> >> > +      /* Bleah, all this to get the comment right for
> MIN_MODE_INT.
> >> */
> >> >> > +      const char *comment_name = void_mode->name;
> >> >> > +
> >> >> > +      if (modes[c])
> >> >> > +     if (c != MODE_INT || !modes[c]->boolean)
> >> >> > +       comment_name = modes[c]->name;
> >> >> > +     else
> >> >> > +       {
> >> >> > +         struct mode_data *m = modes[c];
> >> >> > +         while (m->boolean)
> >> >> > +           m = m->next;
> >> >> > +         if (m)
> >> >> > +           comment_name = m->name;
> >> >> > +         else
> >> >> > +           comment_name = void_mode->name;
> >> >> > +       }
> >> >>
> >> >> Have you tried bootstrapping the patch on a host of your choice?
> >> >> I would expect a warning/Werror about an ambiguous else here.
> >> >>
> >> > No I hadn't and indeed the build fails
> >> >
> >> >>
> >> >> I guess this reduces to:
> >> >>
> >> >>     struct mode_data *m = modes[c];
> >> >>     while (m && m->boolean)
> >> >>       m = m->next;
> >> >>     const char *comment_name = (m ? m : void_mode)->name;
> >> >>
> >> >> but I don't know if that's more readable.
> >> >>
> >> > but to my understanding the problem is that the ambiguous else
> >> > is the first one, and the code should read:
> >> >  if (modes[c])
> >> > +      {
> >> >         if (c != MODE_INT || !modes[c]->boolean)
> >> >           comment_name = modes[c]->name;
> >> >         else
> >> >           {
> >> >             struct mode_data *m = modes[c];
> >> >             while (m->boolean)
> >> >               m = m->next;
> >> >             if (m)
> >> >               comment_name = m->name;
> >> >             else
> >> >               comment_name = void_mode->name;
> >> >           }
> >> >  +    }
> >>
> >> Yeah.  I just meant that the alternative loop was probably simpler,
> >> as a replacement for the outer “if”.
> >>
> >> It looks like that the outer “if” is effectively a peeled iteration of
> >> the while loop in the outer “else”.  And the “c != MODE_INT” part ought
> >> to be redundant: as it stands, the boolean modes don't belong to any
> class.
> >>
> >> Ack, I have now:
> >    for (c = 0; c < MAX_MODE_CLASS; c++)
> >     {
> >       /* Bleah, all this to get the comment right for MIN_MODE_INT.  */
> >       struct mode_data *m = modes[c];
> >       while (m && m->boolean)
> >         m = m->next;
> >       const char *comment_name = (m ? m : void_mode)->name;
> >
> >       tagged_printf ("MIN_%s", mode_class_names[c], comment_name);
> >     }
> >
> >
> > Andre, any chance you tried the suggestion of:
> > ENTRY (Pred1x16_t, V16BI, predicate, 16, pred1, 21)
> > ENTRY (Pred2x8_t, V8BI, predicate, 8, pred1, 21)
> > ENTRY (Pred4x4_t, V4BI, predicate, 4, pred1, 21)
>
> BTW: the final argument should be the length of the __simd<N>_<elt>_t
> type name (for mangling purposes).  It looks like the existing 32-bit
> and 64-bit bfloat entries also get this wrong.
>
> But as far as Andre's point goes: I think we need to construct
> a boolean type explicitly, using build_truth_vector_type_for_mode
> or truth_type_for.  Although the entries above specify the correct mode
> (V16BI, etc.), the mode is really a function of the type tree properties,
> rather than the other way round.
>
> The main thing that makes truth vector types special is that those
> types are the only ones that allow multiple elements in the same byte.
> A “normal” 16-byte vector created by build_vector_type(_for_mode)
> cannot be smaller than 16 bytes.
>
>
Thanks for the help, here is a new version of this patch, which contains
all the changes requested.

If OK, I'll rebase and commit the series.

Thanks
Christophe



> Thanks,
> Richard
>
From 1eaec2a01d1bcbb397c20d7034f85b7b85c6831d Mon Sep 17 00:00:00 2001
From: Christophe Lyon <christophe.l...@foss.st.com>
Date: Wed, 13 Oct 2021 09:16:22 +0000
Subject: [PATCH v4 07/15] arm: Implement MVE predicates as vectors of booleans

This patch implements support for vectors of booleans to support MVE
predicates, instead of HImode.  Since the ABI mandates pred16_t (aka
uint16_t) to represent predicates in intrinsics prototypes, we
introduce a new "predicate" type qualifier so that we can map relevant
builtins HImode arguments and return value to the appropriate vector
of booleans (VxBI).

We have to update test_vector_ops_duplicate, because it iterates using
an offset in bytes, where we would need to iterate in bits: we stop
iterating when we reach the end of the vector of booleans.

In addition, we have to fix the underlying definition of vectors of
booleans because ARM/MVE needs a different representation than
AArch64/SVE. With ARM/MVE the 'true' bit is duplicated over the
element size, so that a true element of V4BI is represented by
'0b1111'.  This patch updates the aarch64 definition of VNx*BI as
needed.

2022-01-13  Christophe Lyon  <christophe.l...@foss.st.com>
	Richard Sandiford  <richard.sandif...@arm.com>

	gcc/
	PR target/100757
	PR target/101325
	* config/aarch64/aarch64-modes.def (VNx16BI, VNx8BI, VNx4BI,
	VNx2BI): Update definition.
	* config/arm/arm-builtins.c (arm_init_simd_builtin_types): Add new
	simd types.
	(arm_init_builtin): Map predicate vectors arguments to HImode.
	(arm_expand_builtin_args): Move HImode predicate arguments to VxBI
	rtx. Move return value to HImode rtx.
	* config/arm/arm-builtins.h (arm_type_qualifiers): Add qualifier_predicate.
	* config/arm/arm-modes.def (B2I, B4I, V16BI, V8BI, V4BI): New modes.
	* config/arm/arm-simd-builtin-types.def (Pred1x16_t,
	Pred2x8_t,Pred4x4_t): New.
	* emit-rtl.c (init_emit_once): Handle all boolean modes.
	* genmodes.c (mode_data): Add boolean field.
	(blank_mode): Initialize it.
	(make_complex_modes): Fix handling of boolean modes.
	(make_vector_modes): Likewise.
	(VECTOR_BOOL_MODE): Use new COMPONENT parameter.
	(make_vector_bool_mode): Likewise.
	(BOOL_MODE): New.
	(make_bool_mode): New.
	(emit_insn_modes_h): Fix generation of boolean modes.
	(emit_class_narrowest_mode): Likewise.
	* machmode.def: (VECTOR_BOOL_MODE): Document new COMPONENT
	parameter.  Use new BOOL_MODE instead of FRACTIONAL_INT_MODE to
	define BImode.
	* rtx-vector-builder.c (rtx_vector_builder::find_cached_value):
	Fix handling of constm1_rtx for VECTOR_BOOL.
	* simplify-rtx.c (native_encode_rtx): Fix support for VECTOR_BOOL.
	(native_decode_vector_rtx): Likewise.
	(test_vector_ops_duplicate): Skip vec_merge test
	with vectors of booleans.
	* varasm.c (output_constant_pool_2): Likewise.

diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def
index 976bf9b42be..8f399225a80 100644
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -47,10 +47,10 @@ ADJUST_FLOAT_FORMAT (HF, &ieee_half_format);
 
 /* Vector modes.  */
 
-VECTOR_BOOL_MODE (VNx16BI, 16, 2);
-VECTOR_BOOL_MODE (VNx8BI, 8, 2);
-VECTOR_BOOL_MODE (VNx4BI, 4, 2);
-VECTOR_BOOL_MODE (VNx2BI, 2, 2);
+VECTOR_BOOL_MODE (VNx16BI, 16, BI, 2);
+VECTOR_BOOL_MODE (VNx8BI, 8, BI, 2);
+VECTOR_BOOL_MODE (VNx4BI, 4, BI, 2);
+VECTOR_BOOL_MODE (VNx2BI, 2, BI, 2);
 
 ADJUST_NUNITS (VNx16BI, aarch64_sve_vg * 8);
 ADJUST_NUNITS (VNx8BI, aarch64_sve_vg * 4);
diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 9c645722230..dd537ec1679 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -1553,11 +1553,25 @@ arm_init_simd_builtin_types (void)
       tree eltype = arm_simd_types[i].eltype;
       machine_mode mode = arm_simd_types[i].mode;
 
-      if (eltype == NULL)
+      if (eltype == NULL
+	  /* VECTOR_BOOL is not supported unless MVE is activated, this would
+	     make build_truth_vector_type_for_mode crash.  */
+	  && ((GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL)
+	      ||!TARGET_HAVE_MVE))
 	continue;
       if (arm_simd_types[i].itype == NULL)
 	{
-	  tree type = build_vector_type (eltype, GET_MODE_NUNITS (mode));
+	  tree type;
+	  if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+	    {
+	      /* Handle MVE predicates: they are internally stored as 16 bits,
+		 but are used as vectors of 1, 2 or 4-bit elements.  */
+	      type = build_truth_vector_type_for_mode (GET_MODE_NUNITS (mode), mode);
+	      eltype = TREE_TYPE (type);
+	    }
+	  else
+	    type = build_vector_type (eltype, GET_MODE_NUNITS (mode));
+
 	  type = build_distinct_type_copy (type);
 	  SET_TYPE_STRUCTURAL_EQUALITY (type);
 
@@ -1695,6 +1709,11 @@ arm_init_builtin (unsigned int fcode, arm_builtin_datum *d,
       if (qualifiers & qualifier_map_mode)
 	op_mode = d->mode;
 
+      /* MVE Predicates use HImode as mandated by the ABI: pred16_t is unsigned
+	 short.  */
+      if (qualifiers & qualifier_predicate)
+	op_mode = HImode;
+
       /* For pointers, we want a pointer to the basic type
 	 of the vector.  */
       if (qualifiers & qualifier_pointer && VECTOR_MODE_P (op_mode))
@@ -2939,6 +2958,11 @@ arm_expand_builtin_args (rtx target, machine_mode map_mode, int fcode,
 	    case ARG_BUILTIN_COPY_TO_REG:
 	      if (POINTER_TYPE_P (TREE_TYPE (arg[argc])))
 		op[argc] = convert_memory_address (Pmode, op[argc]);
+
+	      /* MVE uses mve_pred16_t (aka HImode) for vectors of predicates.  */
+	      if (GET_MODE_CLASS (mode[argc]) == MODE_VECTOR_BOOL)
+		op[argc] = gen_lowpart (mode[argc], op[argc]);
+
 	      /*gcc_assert (GET_MODE (op[argc]) == mode[argc]); */
 	      if (!(*insn_data[icode].operand[opno].predicate)
 		  (op[argc], mode[argc]))
@@ -3144,6 +3168,13 @@ constant_arg:
   else
     emit_insn (insn);
 
+  if (GET_MODE_CLASS (tmode) == MODE_VECTOR_BOOL)
+    {
+      rtx HItarget = gen_reg_rtx (HImode);
+      emit_move_insn (HItarget, gen_lowpart (HImode, target));
+      return HItarget;
+    }
+
   return target;
 }
 
diff --git a/gcc/config/arm/arm-builtins.h b/gcc/config/arm/arm-builtins.h
index e5130d6d286..a8ef8aef82d 100644
--- a/gcc/config/arm/arm-builtins.h
+++ b/gcc/config/arm/arm-builtins.h
@@ -84,7 +84,9 @@ enum arm_type_qualifiers
   qualifier_lane_pair_index = 0x1000,
   /* Lane indices selected in quadtuplets - must be within range of previous
      argument = a vector.  */
-  qualifier_lane_quadtup_index = 0x2000
+  qualifier_lane_quadtup_index = 0x2000,
+  /* MVE vector predicates.  */
+  qualifier_predicate = 0x4000
 };
 
 struct arm_simd_type_info
diff --git a/gcc/config/arm/arm-modes.def b/gcc/config/arm/arm-modes.def
index de689c8b45e..9ed0cd042c5 100644
--- a/gcc/config/arm/arm-modes.def
+++ b/gcc/config/arm/arm-modes.def
@@ -84,6 +84,14 @@ VECTOR_MODE (FLOAT, BF, 2);   /*                 V2BF.  */
 VECTOR_MODE (FLOAT, BF, 4);   /*		 V4BF.  */
 VECTOR_MODE (FLOAT, BF, 8);   /*		 V8BF.  */
 
+/* Predicates for MVE.  */
+BOOL_MODE (B2I, 2, 1);
+BOOL_MODE (B4I, 4, 1);
+
+VECTOR_BOOL_MODE (V16BI, 16, BI, 2);
+VECTOR_BOOL_MODE (V8BI, 8, B2I, 2);
+VECTOR_BOOL_MODE (V4BI, 4, B4I, 2);
+
 /* Fraction and accumulator vector modes.  */
 VECTOR_MODES (FRACT, 4);      /* V4QQ  V2HQ */
 VECTOR_MODES (UFRACT, 4);     /* V4UQQ V2UHQ */
diff --git a/gcc/config/arm/arm-simd-builtin-types.def b/gcc/config/arm/arm-simd-builtin-types.def
index 6ba6f211531..d1d6416dad1 100644
--- a/gcc/config/arm/arm-simd-builtin-types.def
+++ b/gcc/config/arm/arm-simd-builtin-types.def
@@ -51,3 +51,7 @@
   ENTRY (Bfloat16x2_t, V2BF, none, 32, bfloat16, 20)
   ENTRY (Bfloat16x4_t, V4BF, none, 64, bfloat16, 20)
   ENTRY (Bfloat16x8_t, V8BF, none, 128, bfloat16, 20)
+
+  ENTRY (Pred1x16_t, V16BI, predicate, 16, pred1, 16)
+  ENTRY (Pred2x8_t, V8BI, predicate, 8, pred1, 15)
+  ENTRY (Pred4x4_t, V4BI, predicate, 4, pred1, 15)
diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c
index feeee16d320..5bf7d37cfa6 100644
--- a/gcc/emit-rtl.c
+++ b/gcc/emit-rtl.c
@@ -6239,9 +6239,22 @@ init_emit_once (void)
 
   /* For BImode, 1 and -1 are unsigned and signed interpretations
      of the same value.  */
-  const_tiny_rtx[0][(int) BImode] = const0_rtx;
-  const_tiny_rtx[1][(int) BImode] = const_true_rtx;
-  const_tiny_rtx[3][(int) BImode] = const_true_rtx;
+  for (mode = MIN_MODE_BOOL;
+       mode <= MAX_MODE_BOOL;
+       mode = (machine_mode)((int)(mode) + 1))
+    {
+      const_tiny_rtx[0][(int) mode] = const0_rtx;
+      if (mode == BImode)
+	{
+	  const_tiny_rtx[1][(int) mode] = const_true_rtx;
+	  const_tiny_rtx[3][(int) mode] = const_true_rtx;
+	}
+      else
+	{
+	  const_tiny_rtx[1][(int) mode] = const1_rtx;
+	  const_tiny_rtx[3][(int) mode] = constm1_rtx;
+	}
+    }
 
   for (mode = MIN_MODE_PARTIAL_INT;
        mode <= MAX_MODE_PARTIAL_INT;
@@ -6260,13 +6273,16 @@ init_emit_once (void)
       const_tiny_rtx[0][(int) mode] = gen_rtx_CONCAT (mode, inner, inner);
     }
 
-  /* As for BImode, "all 1" and "all -1" are unsigned and signed
-     interpretations of the same value.  */
   FOR_EACH_MODE_IN_CLASS (mode, MODE_VECTOR_BOOL)
     {
       const_tiny_rtx[0][(int) mode] = gen_const_vector (mode, 0);
       const_tiny_rtx[3][(int) mode] = gen_const_vector (mode, 3);
-      const_tiny_rtx[1][(int) mode] = const_tiny_rtx[3][(int) mode];
+      if (GET_MODE_INNER (mode) == BImode)
+	/* As for BImode, "all 1" and "all -1" are unsigned and signed
+	   interpretations of the same value.  */
+	const_tiny_rtx[1][(int) mode] = const_tiny_rtx[3][(int) mode];
+      else
+	const_tiny_rtx[1][(int) mode] = gen_const_vector (mode, 1);
     }
 
   FOR_EACH_MODE_IN_CLASS (mode, MODE_VECTOR_INT)
diff --git a/gcc/genmodes.c b/gcc/genmodes.c
index 6001b854547..5881abd846c 100644
--- a/gcc/genmodes.c
+++ b/gcc/genmodes.c
@@ -78,6 +78,7 @@ struct mode_data
   bool need_bytesize_adj;	/* true if this mode needs dynamic size
 				   adjustment */
   unsigned int int_n;		/* If nonzero, then __int<INT_N> will be defined */
+  bool boolean;
 };
 
 static struct mode_data *modes[MAX_MODE_CLASS];
@@ -88,7 +89,8 @@ static const struct mode_data blank_mode = {
   0, "<unknown>", MAX_MODE_CLASS,
   0, -1U, -1U, -1U, -1U,
   0, 0, 0, 0, 0, 0,
-  "<unknown>", 0, 0, 0, 0, false, false, 0
+  "<unknown>", 0, 0, 0, 0, false, false, 0,
+  false
 };
 
 static htab_t modes_by_name;
@@ -456,7 +458,7 @@ make_complex_modes (enum mode_class cl,
       size_t m_len;
 
       /* Skip BImode.  FIXME: BImode probably shouldn't be MODE_INT.  */
-      if (m->precision == 1)
+      if (m->boolean)
 	continue;
 
       m_len = strlen (m->name);
@@ -528,7 +530,7 @@ make_vector_modes (enum mode_class cl, const char *prefix, unsigned int width,
 	 not be necessary.  */
       if (cl == MODE_FLOAT && m->bytesize == 1)
 	continue;
-      if (cl == MODE_INT && m->precision == 1)
+      if (m->boolean)
 	continue;
 
       if ((size_t) snprintf (buf, sizeof buf, "%s%u%s", prefix,
@@ -548,17 +550,18 @@ make_vector_modes (enum mode_class cl, const char *prefix, unsigned int width,
 
 /* Create a vector of booleans called NAME with COUNT elements and
    BYTESIZE bytes in total.  */
-#define VECTOR_BOOL_MODE(NAME, COUNT, BYTESIZE) \
-  make_vector_bool_mode (#NAME, COUNT, BYTESIZE, __FILE__, __LINE__)
+#define VECTOR_BOOL_MODE(NAME, COUNT, COMPONENT, BYTESIZE)		\
+  make_vector_bool_mode (#NAME, COUNT, #COMPONENT, BYTESIZE,		\
+			 __FILE__, __LINE__)
 static void ATTRIBUTE_UNUSED
 make_vector_bool_mode (const char *name, unsigned int count,
-		       unsigned int bytesize, const char *file,
-		       unsigned int line)
+		       const char *component, unsigned int bytesize,
+		       const char *file, unsigned int line)
 {
-  struct mode_data *m = find_mode ("BI");
+  struct mode_data *m = find_mode (component);
   if (!m)
     {
-      error ("%s:%d: no mode \"BI\"", file, line);
+      error ("%s:%d: no mode \"%s\"", file, line, component);
       return;
     }
 
@@ -596,6 +599,20 @@ make_int_mode (const char *name,
   m->precision = precision;
 }
 
+#define BOOL_MODE(N, B, Y) \
+  make_bool_mode (#N, B, Y, __FILE__, __LINE__)
+
+static void
+make_bool_mode (const char *name,
+		unsigned int precision, unsigned int bytesize,
+		const char *file, unsigned int line)
+{
+  struct mode_data *m = new_mode (MODE_INT, name, file, line);
+  m->bytesize = bytesize;
+  m->precision = precision;
+  m->boolean = true;
+}
+
 #define OPAQUE_MODE(N, B)			\
   make_opaque_mode (#N, -1U, B, __FILE__, __LINE__)
 
@@ -1298,9 +1315,21 @@ enum machine_mode\n{");
       /* Don't use BImode for MIN_MODE_INT, since otherwise the middle
 	 end will try to use it for bitfields in structures and the
 	 like, which we do not want.  Only the target md file should
-	 generate BImode widgets.  */
-      if (first && first->precision == 1 && c == MODE_INT)
-	first = first->next;
+	 generate BImode widgets.  Since some targets such as ARM/MVE
+	 define boolean modes with multiple bits, handle those too.  */
+      if (first && first->boolean)
+	{
+	  struct mode_data *last_bool = first;
+	  printf ("  MIN_MODE_BOOL = E_%smode,\n", first->name);
+
+	  while (first && first->boolean)
+	    {
+	      last_bool = first;
+	      first = first->next;
+	    }
+
+	  printf ("  MAX_MODE_BOOL = E_%smode,\n\n", last_bool->name);
+	}
 
       if (first && last)
 	printf ("  MIN_%s = E_%smode,\n  MAX_%s = E_%smode,\n\n",
@@ -1679,15 +1708,15 @@ emit_class_narrowest_mode (void)
   print_decl ("unsigned char", "class_narrowest_mode", "MAX_MODE_CLASS");
 
   for (c = 0; c < MAX_MODE_CLASS; c++)
-    /* Bleah, all this to get the comment right for MIN_MODE_INT.  */
-    tagged_printf ("MIN_%s", mode_class_names[c],
-		   modes[c]
-		   ? ((c != MODE_INT || modes[c]->precision != 1)
-		      ? modes[c]->name
-		      : (modes[c]->next
-			 ? modes[c]->next->name
-			 : void_mode->name))
-		   : void_mode->name);
+    {
+      /* Bleah, all this to get the comment right for MIN_MODE_INT.  */
+      struct mode_data *m = modes[c];
+      while (m && m->boolean)
+	m = m->next;
+      const char *comment_name = (m ? m : void_mode)->name;
+
+      tagged_printf ("MIN_%s", mode_class_names[c], comment_name);
+    }
 
   print_closer ();
 }
diff --git a/gcc/machmode.def b/gcc/machmode.def
index 866a2082d01..533cf6ab4b2 100644
--- a/gcc/machmode.def
+++ b/gcc/machmode.def
@@ -146,12 +146,13 @@ along with GCC; see the file COPYING3.  If not see
 	Like VECTOR_MODES, but start the mode names with PREFIX instead
 	of the usual "V".
 
-     VECTOR_BOOL_MODE (NAME, COUNT, BYTESIZE)
+     VECTOR_BOOL_MODE (NAME, COUNT, COMPONENT, BYTESIZE)
         Create a vector mode called NAME that contains COUNT boolean
         elements and occupies BYTESIZE bytes in total.  Each boolean
-        element occupies (COUNT * BITS_PER_UNIT) / BYTESIZE bits, with
-        the element at index 0 occupying the lsb of the first byte in
-        memory.  Only the lowest bit of each element is significant.
+        element is of COMPONENT type and occupies (COUNT * BITS_PER_UNIT) /
+        BYTESIZE bits, with the element at index 0 occupying the lsb of the
+        first byte in memory.  Only the lowest bit of each element is
+        significant.
 
      OPAQUE_MODE (NAME, BYTESIZE)
         Create an opaque mode called NAME that is BYTESIZE bytes wide.
@@ -196,7 +197,7 @@ RANDOM_MODE (VOID);
 RANDOM_MODE (BLK);
 
 /* Single bit mode used for booleans.  */
-FRACTIONAL_INT_MODE (BI, 1, 1);
+BOOL_MODE (BI, 1, 1);
 
 /* Basic integer modes.  We go up to TI in generic code (128 bits).
    TImode is needed here because the some front ends now genericly
diff --git a/gcc/rtx-vector-builder.c b/gcc/rtx-vector-builder.c
index e36aba010a0..55ffe0d5a76 100644
--- a/gcc/rtx-vector-builder.c
+++ b/gcc/rtx-vector-builder.c
@@ -90,8 +90,10 @@ rtx_vector_builder::find_cached_value ()
 
   if (GET_MODE_CLASS (m_mode) == MODE_VECTOR_BOOL)
     {
-      if (elt == const1_rtx || elt == constm1_rtx)
+      if (elt == const1_rtx)
 	return CONST1_RTX (m_mode);
+      else if (elt == constm1_rtx)
+	return CONSTM1_RTX (m_mode);
       else if (elt == const0_rtx)
 	return CONST0_RTX (m_mode);
       else
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index c36c825f958..532537ea48d 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -6876,12 +6876,13 @@ native_encode_rtx (machine_mode mode, rtx x, vec<target_unit> &bytes,
 	  /* This is the only case in which elements can be smaller than
 	     a byte.  */
 	  gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
+	  auto mask = GET_MODE_MASK (GET_MODE_INNER (mode));
 	  for (unsigned int i = 0; i < num_bytes; ++i)
 	    {
 	      target_unit value = 0;
 	      for (unsigned int j = 0; j < BITS_PER_UNIT; j += elt_bits)
 		{
-		  value |= (INTVAL (CONST_VECTOR_ELT (x, elt)) & 1) << j;
+		  value |= (INTVAL (CONST_VECTOR_ELT (x, elt)) & mask) << j;
 		  elt += 1;
 		}
 	      bytes.quick_push (value);
@@ -7025,9 +7026,8 @@ native_decode_vector_rtx (machine_mode mode, const vec<target_unit> &bytes,
 	  unsigned int bit_index = first_byte * BITS_PER_UNIT + i * elt_bits;
 	  unsigned int byte_index = bit_index / BITS_PER_UNIT;
 	  unsigned int lsb = bit_index % BITS_PER_UNIT;
-	  builder.quick_push (bytes[byte_index] & (1 << lsb)
-			      ? CONST1_RTX (BImode)
-			      : CONST0_RTX (BImode));
+	  unsigned int value = bytes[byte_index] >> lsb;
+	  builder.quick_push (gen_int_mode (value, GET_MODE_INNER (mode)));
 	}
     }
   else
@@ -7994,17 +7994,23 @@ test_vector_ops_duplicate (machine_mode mode, rtx scalar_reg)
 						    duplicate, last_par));
 
       /* Test a scalar subreg of a VEC_MERGE of a VEC_DUPLICATE.  */
-      rtx vector_reg = make_test_reg (mode);
-      for (unsigned HOST_WIDE_INT i = 0; i < const_nunits; i++)
+      /* Skip this test for vectors of booleans, because offset is in bytes,
+	 while vec_merge indices are in elements (usually bits).  */
+      if (GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL)
 	{
-	  if (i >= HOST_BITS_PER_WIDE_INT)
-	    break;
-	  rtx mask = GEN_INT ((HOST_WIDE_INT_1U << i) | (i + 1));
-	  rtx vm = gen_rtx_VEC_MERGE (mode, duplicate, vector_reg, mask);
-	  poly_uint64 offset = i * GET_MODE_SIZE (inner_mode);
-	  ASSERT_RTX_EQ (scalar_reg,
-			 simplify_gen_subreg (inner_mode, vm,
-					      mode, offset));
+	  rtx vector_reg = make_test_reg (mode);
+	  for (unsigned HOST_WIDE_INT i = 0; i < const_nunits; i++)
+	    {
+	      if (i >= HOST_BITS_PER_WIDE_INT)
+		break;
+	      rtx mask = GEN_INT ((HOST_WIDE_INT_1U << i) | (i + 1));
+	      rtx vm = gen_rtx_VEC_MERGE (mode, duplicate, vector_reg, mask);
+	      poly_uint64 offset = i * GET_MODE_SIZE (inner_mode);
+
+	      ASSERT_RTX_EQ (scalar_reg,
+			     simplify_gen_subreg (inner_mode, vm,
+						  mode, offset));
+	    }
 	}
     }
 
diff --git a/gcc/varasm.c b/gcc/varasm.c
index 76574be191f..5f59b6ace15 100644
--- a/gcc/varasm.c
+++ b/gcc/varasm.c
@@ -4085,6 +4085,7 @@ output_constant_pool_2 (fixed_size_mode mode, rtx x, unsigned int align)
 	unsigned int elt_bits = GET_MODE_BITSIZE (mode) / nelts;
 	unsigned int int_bits = MAX (elt_bits, BITS_PER_UNIT);
 	scalar_int_mode int_mode = int_mode_for_size (int_bits, 0).require ();
+	unsigned int mask = GET_MODE_MASK (GET_MODE_INNER (mode));
 
 	/* Build the constant up one integer at a time.  */
 	unsigned int elts_per_int = int_bits / elt_bits;
@@ -4093,8 +4094,10 @@ output_constant_pool_2 (fixed_size_mode mode, rtx x, unsigned int align)
 	    unsigned HOST_WIDE_INT value = 0;
 	    unsigned int limit = MIN (nelts - i, elts_per_int);
 	    for (unsigned int j = 0; j < limit; ++j)
-	      if (INTVAL (CONST_VECTOR_ELT (x, i + j)) != 0)
-		value |= 1 << (j * elt_bits);
+	    {
+	      auto elt = INTVAL (CONST_VECTOR_ELT (x, i + j));
+	      value |= (elt & mask) << (j * elt_bits);
+	    }
 	    output_constant_pool_2 (int_mode, gen_int_mode (value, int_mode),
 				    i != 0 ? MIN (align, int_bits) : align);
 	  }
-- 
2.25.1

Reply via email to