Re: [PATCH 0/6 ver 4] ] Permute Class Operations

will schmidt via Gcc-patches Thu, 09 Jul 2020 08:45:16 -0700

On Wed, 2020-07-08 at 12:59 -0700, Carl Love wrote:
> [PATCH 2/6] rs6000 Add vector insert builtin support
> 
> ------------------------------------
> V4 changes
>   Rebased on mainline.  Changed FUTURE to P10 as needed.
> 
> ------------------------------------
> V3 changes
> 
>   Replace spaces with of tabs in ChangeLog
>   Ditto in gcc/config/rs6000/vsx.md.
>   Updated description for vec_insertl() builtin.
>   Cleaned up vec_insert description.
> 
> -----------------------------------------------------------------
> v2 changes
> 
> Fix change log entry for config/rs6000/altivec.h
> 
> Fix change log entry for config/rs6000/rs6000-builtin.def
> 
> Fix change log entry for config/rs6000/rs6000-call.c
> 
> vsx.md: Fixed if (BYTES_BIG_ENDIAN) else statements.
> Porting error from pu branch.
> 
> ---------------------------------------------------------------
> GCC maintainers:
> 
> This patch adds support for vec_insertl and vec_inserth builtins.
> 
> The patch has been compiled and tested on
> 
>   powerpc64le-unknown-linux-gnu (Power 9 LE)
> 
> and mambo with no regression errors.
> 
> Please let me know if this patch is acceptable for the mainline branch.
> 
> Thanks.
> 
>                          Carl Love
> 
> --------------------------------------------------------------
> gcc/ChangeLog
> 
> 2020-07-02  Carl Love  <c...@us.ibm.com>
> 
>       * config/rs6000/altivec.h (vec_insertl, vec_inserth): New defines.
>       * config/rs6000/rs6000-builtin.def (VINSERTGPRBL, VINSERTGPRHL,
>       VINSERTGPRWL, VINSERTGPRDL, VINSERTVPRBL, VINSERTVPRHL, VINSERTVPRWL,
>       VINSERTGPRBR, VINSERTGPRHR, VINSERTGPRWR, VINSERTGPRDR, VINSERTVPRBR,
>       VINSERTVPRHR, VINSERTVPRWR): New builtins.
>       (INSERTL, INSERTH): New builtins.
>       * config/rs6000/rs6000-call.c (P10_BUILTIN_VEC_INSERTL,
>       P10_BUILTIN_VEC_INSERTH): New overloaded definitions.
>       (P10_BUILTIN_VINSERTGPRBL, P10_BUILTIN_VINSERTGPRHL,
>       P10_BUILTIN_VINSERTGPRWL, P10_BUILTIN_VINSERTGPRDL,
>       P10_BUILTIN_VINSERTVPRBL, P10_BUILTIN_VINSERTVPRHL,
>       P10_BUILTIN_VINSERTVPRWL): Add case entries.
>       * config/rs6000/vsx.md (define_c_enum): Add UNSPEC_INSERTL,
>       UNSPEC_INSERTR.
>       (define_expand): Add vinsertvl_<mode>, vinsertvr_<mode>,
>       vinsertgl_<mode>, vinsertgr_<mode>, mode is VI2.
>       (define_ins): vinsertvl_internal_<mode>, vinsertvr_internal_<mode>,
>       vinsertgl_internal_<mode>, vinsertgr_internal_<mode>, mode VEC_I.
>       * doc/extend.texi: Add documentation for vec_insertl, vec_inserth.
>


ok

> gcc/testsuite/ChangeLog
> 
> 2020-07-02  Carl Love  <c...@us.ibm.com>
> 
>       * gcc.target/powerpc/vec-insert-word-runnable.c: New test case.
> ---
>  gcc/config/rs6000/altivec.h                   |   2 +
>  gcc/config/rs6000/rs6000-builtin.def          |  18 +
>  gcc/config/rs6000/rs6000-call.c               |  51 +++
>  gcc/config/rs6000/vsx.md                      | 110 ++++++
>  gcc/doc/extend.texi                           |  71 ++++
>  .../powerpc/vec-insert-word-runnable.c        | 345 ++++++++++++++++++
>  6 files changed, 597 insertions(+)
>  create mode 100644 
> gcc/testsuite/gcc.target/powerpc/vec-insert-word-runnable.c
> 
> diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
> index bb1524f4a67..0563853c03f 100644
> --- a/gcc/config/rs6000/altivec.h
> +++ b/gcc/config/rs6000/altivec.h
> @@ -699,6 +699,8 @@ __altivec_scalar_pred(vec_any_nle,
>  /* Overloaded built-in functions for ISA 3.1.  */
>  #define vec_extractl(a, b, c)        __builtin_vec_extractl (a, b, c)
>  #define vec_extracth(a, b, c)        __builtin_vec_extracth (a, b, c)
> +#define vec_insertl(a, b, c)   __builtin_vec_insertl (a, b, c)
> +#define vec_inserth(a, b, c)   __builtin_vec_inserth (a, b, c)
> 
>  #define vec_gnb(a, b)        __builtin_vec_gnb (a, b)
>  #define vec_clrl(a, b)       __builtin_vec_clrl (a, b)
> diff --git a/gcc/config/rs6000/rs6000-builtin.def 
> b/gcc/config/rs6000/rs6000-builtin.def
> index 363656ec05c..e73d144c1cc 100644
> --- a/gcc/config/rs6000/rs6000-builtin.def
> +++ b/gcc/config/rs6000/rs6000-builtin.def
> @@ -2708,6 +2708,22 @@ BU_P10V_3 (VEXTRACTHR, "vextduhvhx", CONST, 
> vextractrv8hi)
>  BU_P10V_3 (VEXTRACTWR, "vextduwvhx", CONST, vextractrv4si)
>  BU_P10V_3 (VEXTRACTDR, "vextddvhx", CONST, vextractrv2di)
> 
> +BU_P10V_3 (VINSERTGPRBL, "vinsgubvlx", CONST, vinsertgl_v16qi)
> +BU_P10V_3 (VINSERTGPRHL, "vinsguhvlx", CONST, vinsertgl_v8hi)
> +BU_P10V_3 (VINSERTGPRWL, "vinsguwvlx", CONST, vinsertgl_v4si)
> +BU_P10V_3 (VINSERTGPRDL, "vinsgudvlx", CONST, vinsertgl_v2di)
> +BU_P10V_3 (VINSERTVPRBL, "vinsvubvlx", CONST, vinsertvl_v16qi)
> +BU_P10V_3 (VINSERTVPRHL, "vinsvuhvlx", CONST, vinsertvl_v8hi)
> +BU_P10V_3 (VINSERTVPRWL, "vinsvuwvlx", CONST, vinsertvl_v4si)
> +
> +BU_P10V_3 (VINSERTGPRBR, "vinsgubvrx", CONST, vinsertgr_v16qi)
> +BU_P10V_3 (VINSERTGPRHR, "vinsguhvrx", CONST, vinsertgr_v8hi)
> +BU_P10V_3 (VINSERTGPRWR, "vinsguwvrx", CONST, vinsertgr_v4si)
> +BU_P10V_3 (VINSERTGPRDR, "vinsgudvrx", CONST, vinsertgr_v2di)
> +BU_P10V_3 (VINSERTVPRBR, "vinsvubvrx", CONST, vinsertvr_v16qi)
> +BU_P10V_3 (VINSERTVPRHR, "vinsvuhvrx", CONST, vinsertvr_v8hi)
> +BU_P10V_3 (VINSERTVPRWR, "vinsvuwvrx", CONST, vinsertvr_v4si)
> +
>  BU_P10V_1 (VSTRIBR, "vstribr", CONST, vstrir_v16qi)
>  BU_P10V_1 (VSTRIHR, "vstrihr", CONST, vstrir_v8hi)
>  BU_P10V_1 (VSTRIBL, "vstribl", CONST, vstril_v16qi)
> @@ -2727,6 +2743,8 @@ BU_P10_OVERLOAD_2 (XXGENPCVM, "xxgenpcvm")
> 
>  BU_P10_OVERLOAD_3 (EXTRACTL, "extractl")
>  BU_P10_OVERLOAD_3 (EXTRACTH, "extracth")
> +BU_P10_OVERLOAD_3 (INSERTL, "insertl")
> +BU_P10_OVERLOAD_3 (INSERTH, "inserth")
> 
>  BU_P10_OVERLOAD_1 (VSTRIR, "strir")
>  BU_P10_OVERLOAD_1 (VSTRIL, "stril")

ok

> diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
> index d3cf2de8878..820b361c0f6 100644
> --- a/gcc/config/rs6000/rs6000-call.c
> +++ b/gcc/config/rs6000/rs6000-call.c
> @@ -5576,6 +5576,28 @@ const struct altivec_builtin_types 
> altivec_overloaded_builtins[] = {
>      RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
>      RS6000_BTI_unsigned_V2DI, RS6000_BTI_UINTQI },
> 
> +  { P10_BUILTIN_VEC_INSERTL, P10_BUILTIN_VINSERTGPRBL,
> +    RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTQI,
> +    RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTSI },
> +  { P10_BUILTIN_VEC_INSERTL, P10_BUILTIN_VINSERTGPRHL,
> +    RS6000_BTI_unsigned_V8HI, RS6000_BTI_UINTHI,
> +    RS6000_BTI_unsigned_V8HI, RS6000_BTI_UINTSI },
> +  { P10_BUILTIN_VEC_INSERTL, P10_BUILTIN_VINSERTGPRWL,
> +    RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTSI,
> +    RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTSI },
> +  { P10_BUILTIN_VEC_INSERTL, P10_BUILTIN_VINSERTGPRDL,
> +    RS6000_BTI_unsigned_V2DI, RS6000_BTI_UINTDI,
> +    RS6000_BTI_unsigned_V2DI, RS6000_BTI_UINTSI },
> + { P10_BUILTIN_VEC_INSERTL, P10_BUILTIN_VINSERTVPRBL,
> +    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
> +    RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTQI },
> +  { P10_BUILTIN_VEC_INSERTL, P10_BUILTIN_VINSERTVPRHL,
> +    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
> +    RS6000_BTI_unsigned_V8HI, RS6000_BTI_UINTQI },
> +  { P10_BUILTIN_VEC_INSERTL, P10_BUILTIN_VINSERTVPRWL,
> +    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
> +    RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTQI },
> +
>    { P10_BUILTIN_VEC_EXTRACTH, P10_BUILTIN_VEXTRACTBR,
>      RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V16QI,
>      RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTQI },
> @@ -5589,6 +5611,28 @@ const struct altivec_builtin_types 
> altivec_overloaded_builtins[] = {
>      RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
>      RS6000_BTI_unsigned_V2DI, RS6000_BTI_UINTQI },
> 
> +  { P10_BUILTIN_VEC_INSERTH, P10_BUILTIN_VINSERTGPRBR,
> +    RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTQI,
> +    RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTSI },
> +  { P10_BUILTIN_VEC_INSERTH, P10_BUILTIN_VINSERTGPRHR,
> +    RS6000_BTI_unsigned_V8HI, RS6000_BTI_UINTHI,
> +    RS6000_BTI_unsigned_V8HI, RS6000_BTI_UINTSI },
> +  { P10_BUILTIN_VEC_INSERTH, P10_BUILTIN_VINSERTGPRWR,
> +    RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTSI,
> +    RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTSI },
> +  { P10_BUILTIN_VEC_INSERTH, P10_BUILTIN_VINSERTGPRDR,
> +    RS6000_BTI_unsigned_V2DI, RS6000_BTI_UINTDI,
> +    RS6000_BTI_unsigned_V2DI, RS6000_BTI_UINTSI },
> +  { P10_BUILTIN_VEC_INSERTH, P10_BUILTIN_VINSERTVPRBR,
> +    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
> +    RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTQI },
> +  { P10_BUILTIN_VEC_INSERTH, P10_BUILTIN_VINSERTVPRHR,
> +    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
> +    RS6000_BTI_unsigned_V8HI, RS6000_BTI_UINTQI },
> +  { P10_BUILTIN_VEC_INSERTH, P10_BUILTIN_VINSERTVPRWR,
> +    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
> +    RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTQI },
> +
>    { P10_BUILTIN_VEC_VSTRIL, P10_BUILTIN_VSTRIBL,
>      RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
>    { P10_BUILTIN_VEC_VSTRIL, P10_BUILTIN_VSTRIBL,
> @@ -13788,6 +13832,13 @@ builtin_function_type (machine_mode mode_ret, 
> machine_mode mode_arg0,
>      case P10_BUILTIN_VEXTRACTHR:
>      case P10_BUILTIN_VEXTRACTWR:
>      case P10_BUILTIN_VEXTRACTDR:
> +    case P10_BUILTIN_VINSERTGPRBL:
> +    case P10_BUILTIN_VINSERTGPRHL:
> +    case P10_BUILTIN_VINSERTGPRWL:
> +    case P10_BUILTIN_VINSERTGPRDL:
> +    case P10_BUILTIN_VINSERTVPRBL:
> +    case P10_BUILTIN_VINSERTVPRHL:
> +    case P10_BUILTIN_VINSERTVPRWL:
>        h.uns_p[0] = 1;
>        h.uns_p[1] = 1;
>        h.uns_p[2] = 1;

ok

> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> index e9f89d43b3f..e9d45d1dcfd 100644
> --- a/gcc/config/rs6000/vsx.md
> +++ b/gcc/config/rs6000/vsx.md
> @@ -349,6 +349,8 @@
>     UNSPEC_XXGENPCV
>     UNSPEC_EXTRACTL
>     UNSPEC_EXTRACTR
> +   UNSPEC_INSERTL
> +   UNSPEC_INSERTR
>    ])
> 
>  (define_int_iterator XVCVBF16        [UNSPEC_VSX_XVCVSPBF16
> @@ -3865,6 +3867,114 @@
>    "vext<du_or_d><wd>vrx %0,%1,%2,%3"
>    [(set_attr "type" "vecsimple")])
> 
> +(define_expand "vinsertvl_<mode>"
> +  [(set (match_operand:VI2 0 "altivec_register_operand")
> +     (unspec:VI2 [(match_operand:VI2 1 "altivec_register_operand")
> +                  (match_operand:VI2 2 "altivec_register_operand")
> +                  (match_operand:SI 3 "register_operand" "r")]
> +                 UNSPEC_INSERTL))]
> +  "TARGET_POWER10"
> +{
> +  if (BYTES_BIG_ENDIAN)
> +     emit_insn (gen_vinsertvl_internal_<mode> (operands[0], operands[3],
> +                                               operands[1], operands[2]));
> +   else
> +     emit_insn (gen_vinsertvr_internal_<mode> (operands[0], operands[3],
> +                                               operands[1], operands[2]));
> +   DONE;
> +})
> +
> +(define_insn "vinsertvl_internal_<mode>"
> +  [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
> +     (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
> +                    (match_operand:VEC_I 2 "altivec_register_operand" "v")
> +                    (match_operand:VEC_I 3 "altivec_register_operand" "0")]
> +                   UNSPEC_INSERTL))]
> +  "TARGET_POWER10"
> +  "vins<wd>vlx %0,%1,%2"
> +  [(set_attr "type" "vecsimple")])
> +
> +(define_expand "vinsertvr_<mode>"
> +  [(set (match_operand:VI2 0 "altivec_register_operand")
> +     (unspec:VI2 [(match_operand:VI2 1 "altivec_register_operand")
> +                  (match_operand:VI2 2 "altivec_register_operand")
> +                  (match_operand:SI 3 "register_operand" "r")]
> +                 UNSPEC_INSERTR))]
> +  "TARGET_POWER10"
> +{
> +  if (BYTES_BIG_ENDIAN)
> +     emit_insn (gen_vinsertvr_internal_<mode> (operands[0], operands[3],
> +                                               operands[1], operands[2]));
> +   else
> +     emit_insn (gen_vinsertvl_internal_<mode> (operands[0], operands[3],
> +                                               operands[1], operands[2]));
> +   DONE;
> +})
> +
> +(define_insn "vinsertvr_internal_<mode>"
> +  [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
> +     (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
> +                    (match_operand:VEC_I 2 "altivec_register_operand" "v")
> +                    (match_operand:VEC_I 3 "altivec_register_operand" "0")]
> +                   UNSPEC_INSERTR))]
> +  "TARGET_POWER10"
> +  "vins<wd>vrx %0,%1,%2"
> +  [(set_attr "type" "vecsimple")])
> +
> +(define_expand "vinsertgl_<mode>"
> +  [(set (match_operand:VI2 0 "altivec_register_operand")
> +     (unspec:VI2 [(match_operand:SI 1 "register_operand")
> +                  (match_operand:VI2 2 "altivec_register_operand")
> +                  (match_operand:SI 3 "register_operand")]
> +                 UNSPEC_INSERTL))]
> +  "TARGET_POWER10"
> +{
> +  if (BYTES_BIG_ENDIAN)
> +    emit_insn (gen_vinsertgl_internal_<mode> (operands[0], operands[3],
> +                                            operands[1], operands[2]));
> +  else
> +    emit_insn (gen_vinsertgr_internal_<mode> (operands[0], operands[3],
> +                                            operands[1], operands[2]));
> +  DONE;
> + })
> +
> +(define_insn "vinsertgl_internal_<mode>"
> + [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
> +       (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
> +                   (match_operand:SI 2 "register_operand" "r")
> +                   (match_operand:VEC_I 3 "altivec_register_operand" "0")]
> +                  UNSPEC_INSERTL))]
> + "TARGET_POWER10"
> + "vins<wd>lx %0,%1,%2"
> + [(set_attr "type" "vecsimple")])
> +
> +(define_expand "vinsertgr_<mode>"
> +  [(set (match_operand:VI2 0 "altivec_register_operand")
> +     (unspec:VI2 [(match_operand:SI 1 "register_operand")
> +                  (match_operand:VI2 2 "altivec_register_operand")
> +                  (match_operand:SI 3 "register_operand")]
> +                 UNSPEC_INSERTR))]
> +  "TARGET_POWER10"
> +{
> +  if (BYTES_BIG_ENDIAN)
> +    emit_insn (gen_vinsertgr_internal_<mode> (operands[0], operands[3],
> +                                            operands[1], operands[2]));
> +  else
> +    emit_insn (gen_vinsertgl_internal_<mode> (operands[0], operands[3],
> +                                            operands[1], operands[2]));
> +  DONE;
> + })
> +
> +(define_insn "vinsertgr_internal_<mode>"
> + [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
> +   (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
> +               (match_operand:SI 2 "register_operand" "r")
> +               (match_operand:VEC_I 3 "altivec_register_operand" "0")]
> +              UNSPEC_INSERTR))]
> + "TARGET_POWER10"
> + "vins<wd>rx %0,%1,%2"
> + [(set_attr "type" "vecsimple")])
> +
>  ;; VSX_EXTRACT optimizations
>  ;; Optimize double d = (double) vec_extract (vi, <n>)
>  ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index 0e65d542587..e643346a160 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -20991,6 +20991,77 @@ Perform a vector parallel bits deposit operation, as 
> if implemented by
>  the @code{vpdepd} instruction.
>  @findex vec_pdep
> 
> +Vector Insert
> +
> +@smallexample
> +@exdent vector unsigned char
> +@exdent vec_insertl (unsigned char, vector unsigned char, unsigned int);
> +@exdent vector unsigned short
> +@exdent vec_insertl (unsigned short, vector unsigned short, unsigned int);
> +@exdent vector unsigned int
> +@exdent vec_insertl (unsigned int, vector unsigned int, unsigned int);
> +@exdent vector unsigned long long
> +@exdent vec_insertl (unsigned long long, vector unsigned long long,
> +unsigned int);
> +@exdent vector unsigned char
> +@exdent vec_insertl (vector unsigned char, vector unsigned char, unsigned 
> int;
> +@exdent vector unsigned short
> +@exdent vec_insertl (vector unsigned short, vector unsigned short,
> +unsigned int);
> +@exdent vector unsigned int
> +@exdent vec_insertl (vector unsigned int, vector unsigned int, unsigned int);
> +@end smallexample
> +
> +Let src be the first argument, when the first argument is a scalar, or the
> +rightmost element of the left doubleword of the first argument, when the 
> first
> +argument is a vector.  Insert the source into the destination at the position
> +given by the third argument, using natural element order in the second
> +argument.  The rest of the second argument is unchanged.  If the byte
> +index is greater than 14 for halfwords, greatere than 12 for words, or

greatere

> +greater than 8 for doublewords the result is undefined.   For little-endian,
> +the generated code will be semantically equivalent to vinsbrx, vinshrx,
> +or vinswrx instructions.  Similarly for big-endian it will be semantically

wrap those in @code

> +equivalent to vinsblx, vinshlx, vinswlx.  Note that some
> +fairly anomalous results can be generated if the byte index is not aligned
> +on an element boundary for the sort of element being inserted. This is a

s/sort/type/ ? 

> +limitation of the bi-endian vector programming model.

Not sure the limitation statemt is usefulfor the description of the
builtin.


> +@findex vec_insertl
> +
> +@smallexample
> +@exdent vector unsigned char
> +@exdent vec_inserth (unsigned char, vector unsigned char, unsigned int);
> +@exdent vector unsigned short
> +@exdent vec_inserth (unsigned short, vector unsigned short, unsigned int);
> +@exdent vector unsigned int
> +@exdent vec_inserth (unsigned int, vector unsigned int, unsigned int);
> +@exdent vector unsigned long long
> +@exdent vec_inserth (unsigned long long, vector unsigned long long,
> +unsigned int);
> +@exdent vector unsigned char
> +@exdent vec_inserth (vector unsigned char, vector unsigned char, unsigned 
> int);
> +@exdent vector unsigned short
> +@exdent vec_inserth (vector unsigned short, vector unsigned short,
> +unsigned int);
> +@exdent vector unsigned int
> +@exdent vec_inserth (vector unsigned int, vector unsigned int, unsigned int);
> +@end smallexample
> +
> +Let src be the first argument, when the first argument is a scalar, or the
> +rightmost element of the first argument, when the first argument is a vector.
> +Insert src into the second argument at the position identified by the third
> +argument, using opposite element order in the second argument, and leaving 
> the
> +rest of the second argument unchanged.  If the byte index is greater than 14
> +for halfwords, 12 for words, or 8 for doublewords, the intrinsic will be
> +rejected. Note that the underlying hardware instruction uses the same 
> register
> +for the second argument and the result, but this is hidden by the built-in.

If it's hidden, it probably doesn't need to be discussed here.  (A
comment on the builtin implementation would be appropriate).

> +For little-endian, the code generation will be semantically equivalent to
> +vins*lx, while for big-endian it will be semantically equivalent to vins*rx.

wrap in @code{}

> +Note that some fairly anomalous results can be generated if the byte index is
> +not aligned on an element boundary for the sort of element being inserted.
> +This is a limitation of the bi-endian vector programming model consistent 
> with
> +the limitation on vec_perm, for example.
> +@findex vec_inserth
> +
>  @smallexample
>  @exdent vector unsigned long long int
>  @exdent vec_pext (vector unsigned long long int, vector unsigned long long 
> int)
> diff --git a/gcc/testsuite/gcc.target/powerpc/vec-insert-word-runnable.c 
> b/gcc/testsuite/gcc.target/powerpc/vec-insert-word-runnable.c
> new file mode 100644
> index 00000000000..8c2721aedfc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/vec-insert-word-runnable.c


<snip>
ok.

Re: [PATCH 0/6 ver 4] ] Permute Class Operations

Reply via email to