Hi!

On Thu, Jun 18, 2020 at 03:44:04PM -0500, Peter Bergner wrote:
> This patch adds the new -mmma option as well as the initial MMA support,
> which includes the target specific __vector_pair and __vector_quad types,
> the POImode and PXImode partial integer modes they are mapped to, and their
> associated  move patterns.  Support for the restrictions on the registers
> these modes can be assigned to as also been added.

>       (rs6000_builtin_mask_calculate): Add support for RS6000_BTM_MMA
>       and RS6000_BTM_FUTURE.
The latter is already there?

>       * config/rs6000/rs6000.md (define_attr "isa"): Add mma.

Is this ever useful?  Please leave it out if not.  The "isa" things
are only for when some insn alternatives are available only one some
configurations and not others (not for when the whole pattern is not
valid).

Maybe a later patch uses this?

>       (define_mode_iterator RELOAD): Add POI and PXI.

Why POI and PXI, but not OI and XI?

>       Include mma.md.

That looks to be about RELOAD, the way it is placed.  Maybe put it as
the very first thing for this file, in the changelog?

> +;; cause byte swapping issues on litte-endian systems.  We don't need
> +;; the XImode and OImode move patterns for actual code generation,
> +;; therefor, we define the XImode and OImode move patterns, but we
> +;; disable their use with a "false" condition flag.

"therefore".

> +;; Define a disabled OImode move pattern, so we can use POImode.
> +(define_expand "movoi"
> +  [(set (match_operand:OI 0 "nonimmediate_operand")
> +     (match_operand:OI 1 "input_operand"))]
> +  "0"
> +{
> +  gcc_unreachable ();
> +})

So dirty, I love it :-)

> +(define_insn_and_split "*movpoi"
> +  [(set (match_operand:POI 0 "nonimmediate_operand" "=wa,m,wa")
> +     (match_operand:POI 1 "input_operand"        "m,wa,wa"))]

Don't use tabs other than at the start of the line, please (or *maybe*
in tables).

> +;; Special pattern to prevent DSE from generating an internal error if it
> +;; notices a structure copy that it wants to eliminate.  This generates 
> pretty
> +;; bad code, but at least it doesn't die.
> +(define_insn_and_split "truncpoidi2"

Could you say *why*/*how* it prevents the ICE here?

> +  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
> +     (truncate:DI (match_operand:POI 1 "gpc_reg_operand" "wa")))]
> +  "TARGET_MMA"
> +  "#"
> +  "&& reload_completed"
> +  [(set (match_dup 0)
> +     (vec_select:DI (match_dup 2)
> +                    (parallel [(match_dup 3)])))]
> +{
> +  unsigned r = reg_or_subregno (operands[1]) + !BYTES_BIG_ENDIAN;

Don't + booleans please (use ?: instead).

> +  operands[2] = gen_rtx_REG (V2DImode, r);
> +  operands[3] = BYTES_BIG_ENDIAN ? const1_rtx : const0_rtx;
> +})

So maybe just do an  if (BYTES_BIG_ENDIAN)  even, the arms simplify a
bit then.

> +;; Vector quad support.  PXImode is only defined for floating point 
> registers.

Rephrase this?  A mode is defined without referring to registers at
all ;-)  "PXImode can only live in FPRs", something like that?

> +  /* Vector pair modes need even/odd VSX register pairs.  Only allow vector
> +     registers.  We need to allow OImode to have the same registers as 
> POImode,
> +     even though we do not enable the move pattern for OImode.  */
> +  if (mode == POImode || mode == OImode)
> +    return (TARGET_MMA && VSX_REGNO_P (regno)
> +         && (regno & 1) == 0);

Put it all one one line?

> +  /* MMA accumulator modes need FPR registers divisible by 4.  We need to 
> allow
> +     XImode to have the same registers as PXImode, even though we do not 
> enable
> +     the move pattern for XImode.  */
> +  if (mode == PXImode || mode == XImode)
> +    return (TARGET_MMA && FP_REGNO_P (regno)
> +         && (regno & 3) == 0);

Likewise.

Why are OImode and XImode handled here?

>  static bool
>  rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
>  {
> -  if (mode1 == PTImode)
> -    return mode2 == PTImode;
> -  if (mode2 == PTImode)
> +  if (mode1 == PTImode || mode1 == POImode || mode1 == PXImode)
> +    return mode1 == mode2;
> +  if (mode2 == PTImode || mode2 == POImode || mode2 == PXImode)
>      return false;

You can just do
  if (mode1 == PTImode || mode1 == POImode || mode1 == PXImode
      || mode2 == PTImode || mode2 == POImode || mode2 == PXImode)
    return mode1 == mode2;

> @@ -2206,6 +2227,8 @@ rs6000_debug_reg_global (void)
>      SDmode,
>      DDmode,
>      TDmode,
> +    V2SImode,
> +    V2SFmode,

Did the changelog mention these?  If it is a bugfix, could it need a
backport?  Do it a separate patch then?

Well, it is debug info only, so not really interesting, but heh.

> @@ -2220,9 +2243,14 @@ rs6000_debug_reg_global (void)
>      V2DFmode,
>      V8SFmode,
>      V4DFmode,
> +    OImode,
> +    XImode,
> +    POImode,
> +    PXImode,
>      CCmode,
>      CCUNSmode,
>      CCEQmode,
> +    CCFPmode,
>    };

Same for the CCFP one here.

> +  /* Add support for vector pairs and vector quad registers.  */
> +  if (TARGET_MMA)
> +    {
> +      for (m = 0; m < NUM_MACHINE_MODES; ++m)
> +     if (m == POImode || m == PXImode)
> +       {
> +         rs6000_vector_unit[m] = VECTOR_NONE;
> +         rs6000_vector_mem[m] = VECTOR_VSX;
> +         rs6000_vector_align[m] = (m == POImode) ? 256 : 512;
> +       }
> +    }

This is just

  /* Add support for vector pairs and vector quad registers.  */
  if (TARGET_MMA)
    {
      rs6000_vector_unit[POImode] = VECTOR_NONE;
      rs6000_vector_mem[POImode] = VECTOR_VSX;
      rs6000_vector_align[POImode] = 256;

      rs6000_vector_unit[PXImode] = VECTOR_NONE;
      rs6000_vector_mem[PXImode] = VECTOR_VSX;
      rs6000_vector_align[PXImode] = 512;
    }

which is just not longer (even although is has that whiteline :-) )

> @@ -15793,7 +15898,23 @@ rs6000_split_multireg_move (rtx dst, rtx src)
>    reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
>    mode = GET_MODE (dst);
>    nregs = hard_regno_nregs (reg, mode);
> -  if (FP_REGNO_P (reg))
> +  /* If we have a quad vector register for MMA, and this is a load or store,
> +     see if we can use vector paired load/stores.  */
> +  if (mode == PXImode && TARGET_MMA
> +      && (MEM_P (dst) || MEM_P (src)))
> +    {
> +      reg_mode = POImode;;
> +      nregs /= hard_regno_nregs (reg, reg_mode);
> +    }

(doubled semicolon)

So nregs is always 2?  Maybe it is better to just assert that here then?

> @@ -19249,6 +19416,9 @@ rs6000_mangle_type (const_tree type)
>    if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
>      return ieee128_mangling_gcc_8_1 ? "U10__float128" : "u9__ieee128";
>  
> +  if (type == vector_pair_type_node) return "u13__vector_pair";
> +  if (type == vector_quad_type_node) return "u13__vector_quad";

Line breaks?

>  /* No data type wants to be aligned rounder than this.  */
> -#define BIGGEST_ALIGNMENT 128
> +#define BIGGEST_ALIGNMENT ((TARGET_MMA) ? 512 : 128)

No silly parens around TARGET_MMA please (macros should protect
themselves, sure, but not try to protect other macros).


Okay for trunk modulo the above.  Thanks!  This was much less painful
than I feared.  Well, maybe it is all in the other patches, I'll get to
those tomorrow ;-)


Segher

Reply via email to