> -----Original Message-----
> From: Alfie Richards <[email protected]>
> Sent: 09 February 2026 10:10
> To: [email protected]
> Cc: Richard Earnshaw <[email protected]>; Tamar Christina
> <[email protected]>; [email protected]; Alice Carlotti
> <[email protected]>; Alex Coplan <[email protected]>; Wilco
> Dijkstra <[email protected]>; [email protected]; Alfie
> Richards <[email protected]>
> Subject: [PATCH] aarch64: Add support for range prefetch intrinsic.
> 
> Hi all,
> 
> This patch adds support for the aarch64 range prefetch intrinsic.
> 
> Bootstrapped and reg tested for AArch64-linux-gnu.
> 
> Okay for master (in stage 1 maybe?)
> 
> KR,
> Alfie
> 
> -- >8 --
> 
> Also updates require_const_argument to always return a value in range.
> 
> gcc/ChangeLog:
> 
>       * config/aarch64/aarch64-builtins.cc (enum aarch64_builtins):
>       Add AARCH64_PREFETCH_PLD_RANGE and
> AARCH64_PREFETCH_PLDX_RANGE.
>       (aarch64_init_prefetch_builtins): Add initialization of
>       __pld_range and __pldx_range.
>       (require_const_argument): Update to return the minval if value
>       is out of range.
>       (aarch64_expand_prefetch_range_builtin): New function.
>       (aarch64_general_expand_builtin): Add support for
>       AARCH64_PREFETCH_PLD_RANGE and
> AARCH64_PREFETCH_PLDX_RANGE.
>       * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Add
>       __ARM_PREFETCH_RANGE macro.
>       * config/aarch64/aarch64.md (unspec): Add UNSPEC_PLDX_RANGE
> and
>       UNSPEC_PLD_RANGE
>       (aarch64_rprfm): New instruction.
> 
> gcc/testsuite/ChangeLog:
> 
>       * gcc.target/aarch64/acle/rprfm.c: New test.
>       * gcc.target/aarch64/acle/rprfm_error.c: New test.
> ---
>  gcc/config/aarch64/aarch64-builtins.cc        | 128 +++++++++++++++++-
>  gcc/config/aarch64/aarch64-c.cc               |   1 +
>  gcc/config/aarch64/aarch64.md                 |  14 ++
>  gcc/testsuite/gcc.target/aarch64/acle/rprfm.c | 107 +++++++++++++++
>  .../gcc.target/aarch64/acle/rprfm_error.c     |  31 +++++
>  5 files changed, 276 insertions(+), 5 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rprfm.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rprfm_error.c
> 
> diff --git a/gcc/config/aarch64/aarch64-builtins.cc
> b/gcc/config/aarch64/aarch64-builtins.cc
> index dd74cf06ef2..39658ffab11 100644
> --- a/gcc/config/aarch64/aarch64-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-builtins.cc
> @@ -894,6 +894,8 @@ enum aarch64_builtins
>    AARCH64_WSR128,
>    AARCH64_PREFETCH_PLD,
>    AARCH64_PREFETCH_PLDX,
> +  AARCH64_PREFETCH_PLD_RANGE,
> +  AARCH64_PREFETCH_PLDX_RANGE,
>    AARCH64_PREFETCH_PLI,
>    AARCH64_PREFETCH_PLIX,
>    AARCH64_PREFETCH_PLDIR,
> @@ -2249,6 +2251,18 @@ aarch64_init_prefetch_builtins (void)
> 
>    ftype = build_function_type_list (void_type_node, cv_argtype, NULL_TREE);
>    AARCH64_INIT_PREFETCH_BUILTINS_DECL ("__pldir", PLDIR);
> +
> +  ftype = build_function_type_list (void_type_node, unsigned_type_node,
> +                                 unsigned_type_node, integer_type_node,
> +                                 unsigned_type_node, integer_type_node,
> +                                 size_type_node, cv_argtype, NULL);
> +  AARCH64_INIT_PREFETCH_BUILTINS_DECL ("__pldx_range", PLDX_RANGE);
> +
> +  ftype = build_function_type_list (void_type_node, unsigned_type_node,
> +                                 unsigned_type_node,
> +                                 long_long_unsigned_type_node,
> cv_argtype,
> +                                 NULL);
> +  AARCH64_INIT_PREFETCH_BUILTINS_DECL ("__pld_range", PLD_RANGE);
>  }
> 
>  /* Initialize the memory tagging extension (MTE) builtins.  */
> @@ -3650,9 +3664,13 @@ require_const_argument (tree exp, unsigned int
> argno, HOST_WIDE_INT minval,
>    auto argval = wi::to_widest (arg);
> 
>    if (argval < minval || argval > maxval)
> -    error_at (EXPR_LOCATION (exp),
> -           "argument %d must be a constant immediate "
> -           "in range [%wd,%wd]", argno + 1, minval, maxval);
> +    {
> +      error_at (EXPR_LOCATION (exp),
> +             "argument %d must be a constant immediate "
> +             "in range [%wd,%wd]",
> +             argno + 1, minval, maxval);
> +      return minval;
> +    }
> 

Heh, I thought error_at was NORETURN, but I guess it's not.
maybe -1 is a better return value since the range is invalid and
the value shouldn't be used.

>    HOST_WIDE_INT retval = argval.to_shwi ();
>    return retval;
> @@ -3723,8 +3741,104 @@ aarch64_expand_prefetch_builtin (tree exp, int
> fcode)
>    maybe_expand_insn (CODE_FOR_aarch64_pldx, 2, ops);
>  }
> 
> -/* Expand an expression EXP that calls a MEMTAG built-in FCODE
> -   with result going to TARGET.  */
> +/* Expand a prefetch range builtin EXP.  */
> +void
> +aarch64_expand_prefetch_range_builtin (tree exp, int fcode)
> +{
> +  char prfop[11];
> +  class expand_operand ops[3];
> +
> +  static const char *kind_s[] = {"PLD", "PST"};
> +  static const char *rettn_s[] = {"KEEP", "STRM"};
> +
> +  int argno = 0;
> +
> +  int kind_id = require_const_argument (exp, argno++, 0, ARRAY_SIZE
> (kind_s));
> +  int rettn_id = require_const_argument (exp, argno++, 0, ARRAY_SIZE
> (rettn_s));
> +
> +  rtx metadata = NULL_RTX;
> +
> +  switch (fcode)
> +    {
> +    case AARCH64_PREFETCH_PLDX_RANGE:
> +      {
> +     /* length must be in [-2^21,2^21).  */
> +     int length = require_const_argument (exp, argno++, -(1 << 21), 1 <<
> 21);
> +     gcc_assert (length >= -(1 << 21) && length < (1 << 21));
> +
> +     /* count must be in [1,2^16].  */
> +     int count = require_const_argument (exp, argno++, 1, (1 << 16) + 1);
> +     gcc_assert (count >= 1 && count <= (1 << 16));
> +
> +     /* stride must be in [-2^21,2^21).  */
> +     int stride = require_const_argument (exp, argno++, -(1 << 21), 1 <<
> 21);
> +     gcc_assert (stride >= -(1 << 21) && stride < (1 << 21));
> +
> +     /* There is no requirements on reuse_distance other than to be a
> +        non-negative integer.  However it is meaningless for
> +        values less than 2^15 or greater than 2^29.  */
> +     uint64_t reuse_distance = require_const_argument (exp, argno++, 0,
> +                                                        LONG_LONG_MAX);
> +
> +     uint64_t length_bits = ((uint64_t) length) & ((1 << 22) - 1);
> +     uint64_t count_bits = ((uint64_t) count - 1) & ((1 << 16) - 1);
> +     uint64_t stride_bits = ((uint64_t) stride) & ((1 << 22) - 1);
> +
> +     uint64_t reuse_distance_bits = 0;
> +       /* If reuse distance > 512MiB or = 0 then use 0 to represent distance
> +          unknown.  */
> +     if (reuse_distance != 0 && reuse_distance <= (1ULL << 29))
> +       {
> +         /* Find the largest n such that (2 ^ (15-n)) * 32KB >= reuse
> +            distance.  */
> +         if (reuse_distance <= (1ULL << 15))
> +           reuse_distance_bits = 15;
> +         else
> +           reuse_distance_bits = __builtin_clzll (reuse_distance - 1) - 34;
> +
> +         /* Reuse distance is a 4 bit value.  */
> +         gcc_assert (reuse_distance_bits < (1 << 4));
> +       }
> +
> +     uint64_t metadata_val = length_bits
> +                             | (count_bits << 22)
> +                             | (stride_bits << 38)
> +                             | (reuse_distance_bits << 60);
> +
> +     metadata = GEN_INT (metadata_val);
> +     break;
> +      }
> +    case AARCH64_PREFETCH_PLD_RANGE:
> +      {
> +     tree metadata_arg = CALL_EXPR_ARG (exp, argno++);
> +     metadata = copy_to_mode_reg (E_DImode, expand_normal
> (metadata_arg));
> +     break;
> +      }
> +    default:
> +      gcc_unreachable ();
> +    }
> +
> +  /* Any -1 id variable is to be user-supplied.  Here we fill these in and 
> run
> +     bounds checks on them.  "PLI" is used only implicitly by AARCH64_PLI &
> +     AARCH64_PLIX, never explicitly.  */
> +  rtx address = expand_expr (CALL_EXPR_ARG (exp, argno), NULL_RTX,
> Pmode,
> +                          EXPAND_NORMAL);
> +
> +  if (seen_error ())
> +    return;
> +
> +  sprintf (prfop, "%s%s", kind_s[kind_id], rettn_s[rettn_id]);
> +
> +  rtx const_str = rtx_alloc (CONST_STRING);
> +  PUT_CODE (const_str, CONST_STRING);
> +  XSTR (const_str, 0) = ggc_strdup (prfop);
> +
> +  create_fixed_operand (&ops[0], const_str);
> +  create_input_operand (&ops[1], metadata, E_DImode);
> +  create_address_operand (&ops[2], address);
> +  maybe_expand_insn (CODE_FOR_aarch64_rprfm, 3, ops);
> +}
> +
>  static rtx
>  aarch64_expand_builtin_memtag (int fcode, tree exp, rtx target)
>  {
> @@ -4578,6 +4692,10 @@ aarch64_general_expand_builtin (unsigned int
> fcode, tree exp, rtx target,
>      case AARCH64_PREFETCH_PLDIR:
>        aarch64_expand_pldir_builtin (exp);
>        return target;
> +    case AARCH64_PREFETCH_PLD_RANGE:
> +    case AARCH64_PREFETCH_PLDX_RANGE:
> +      aarch64_expand_prefetch_range_builtin (exp, fcode);
> +      return target;
>      case AARCH64_BUILTIN_CHKFEAT:
>        {
>       rtx x16_reg = gen_rtx_REG (DImode, R16_REGNUM);
> diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-
> c.cc
> index f8be998da16..58fa761a9bb 100644
> --- a/gcc/config/aarch64/aarch64-c.cc
> +++ b/gcc/config/aarch64/aarch64-c.cc
> @@ -310,6 +310,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
>                       "__ARM_FEATURE_SME2p1", pfile);
>    aarch64_def_or_undef (TARGET_FAMINMAX,
> "__ARM_FEATURE_FAMINMAX", pfile);
>    aarch64_def_or_undef (TARGET_PCDPHINT, "__ARM_FEATURE_PCDPHINT",
> pfile);
> +  builtin_define ("__ARM_PREFETCH_RANGE");
> 
>    // Function multi-versioning defines
>    aarch64_def_or_undef (targetm.has_ifunc_p (),
> diff --git a/gcc/config/aarch64/aarch64.md
> b/gcc/config/aarch64/aarch64.md
> index 70a64a6c0ed..4c5485cba1d 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -393,6 +393,8 @@ (define_c_enum "unspec" [
>      UNSPEC_SYSREG_WTI
>      UNSPEC_PLDX
>      UNSPEC_PLDIR
> +    UNSPEC_PLDX_RANGE
> +    UNSPEC_PLD_RANGE
>      ;; Represents an SVE-style lane index, in which the indexing applies
>      ;; within the containing 128-bit block.
>      UNSPEC_SVE_LANE_SELECT
> @@ -1381,6 +1383,18 @@ (define_insn "aarch64_pldx"
>    [(set_attr "type" "load_4")]
>  )
> 
> +(define_insn "aarch64_rprfm"
> +  [(unspec [(match_operand 0 "" "")
> +         (match_operand:DI 1 "aarch64_prefetch_operand" "Dp")
> +         (match_operand:DI 2 "register_operand" "r")] UNSPEC_PLDX)]
> +  ""
> +  {
> +    operands[2] = gen_rtx_MEM (DImode, operands[2]);
> +    return "rprfm\\t%0, %1, %2";
> +  }
> +  [(set_attr "type" "load_4")]
> +)

I think operand 2 here should be

match_operand:DI 2 "memory_operand" "Q")

which should allow you to drop the last minute conversion to MEM.

OK with those changes.

Thanks,
Tamar

> +
>  (define_insn "trap"
>    [(trap_if (const_int 1) (const_int 8))]
>    ""
> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/rprfm.c
> b/gcc/testsuite/gcc.target/aarch64/acle/rprfm.c
> new file mode 100644
> index 00000000000..02f0f11223b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/acle/rprfm.c
> @@ -0,0 +1,107 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=armv8-a -O1 -fno-schedule-insns" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +#include <arm_acle.h>
> +
> +/* Access kind specifiers.  */
> +#define PLD 0
> +#define PST 1
> +/* Retention policies.  */
> +#define KEEP 0
> +#define STRM 1
> +
> +/* This test is a bit awkward as we need to test the constants that get 
> passed
> +   into x1. This may be a bit fragile.  */
> +
> +/*
> +** pldx_range:
> +**...
> +**   mov     x1, 0
> +**   rprfm   PLDKEEP, x1, \[x0\]
> +**   rprfm   PSTKEEP, x1, \[x0\]
> +**   rprfm   PLDSTRM, x1, \[x0\]
> +**   rprfm   PSTSTRM, x1, \[x0\]
> +**   mov     x1, 1
> +**   rprfm   PLDKEEP, x1, \[x0\]
> +**   mov     x1, 4194303
> +**   rprfm   PLDKEEP, x1, \[x0\]
> +**   mov     x1, 2097152
> +**   rprfm   PLDKEEP, x1, \[x0\]
> +**   mov     x1, 2097151
> +**   rprfm   PLDKEEP, x1, \[x0\]
> +**   mov     x1, 0
> +**   rprfm   PLDKEEP, x1, \[x0\]
> +**   mov     x1, 4194304
> +**   rprfm   PLDKEEP, x1, \[x0\]
> +**   mov     x1, 274873712640
> +**   rprfm   PLDKEEP, x1, \[x0\]
> +**   mov     x1, 576460752303423488
> +**   rprfm   PLDKEEP, x1, \[x0\]
> +**   mov     x1, 576460477425516544
> +**   rprfm   PLDKEEP, x1, \[x0\]
> +**   mov     x1, -1152921504606846976
> +**   rprfm   PLDKEEP, x1, \[x0\]
> +**   rprfm   PLDKEEP, x1, \[x0\]
> +**   rprfm   PLDKEEP, x1, \[x0\]
> +**   mov     x1, -2305843009213693952
> +**   rprfm   PLDKEEP, x1, \[x0\]
> +**   rprfm   PLDKEEP, x1, \[x0\]
> +**   mov     x1, -3458764513820540928
> +**   rprfm   PLDKEEP, x1, \[x0\]
> +**   mov     x1, 2305843009213693952
> +**   rprfm   PLDKEEP, x1, \[x0\]
> +**   mov     x1, 1152921504606846976
> +**   rprfm   PLDKEEP, x1, \[x0\]
> +**   rprfm   PLDKEEP, x1, \[x0\]
> +**   rprfm   PLDKEEP, x1, \[x0\]
> +**   mov     x1, 0
> +**   rprfm   PLDKEEP, x1, \[x0\]
> +**   rprfm   PLDKEEP, x1, \[x0\]
> +**   rprfm   PLDKEEP, x1, \[x0\]
> +**...
> +*/
> +int pldx_range (void *a) {
> +  __pldx_range (PLD, KEEP, 0, 1, 0, 0, a);
> +  __pldx_range (PST, KEEP, 0, 1, 0, 0, a);
> +  __pldx_range (PLD, STRM, 0, 1, 0, 0, a);
> +  __pldx_range (PST, STRM, 0, 1, 0, 0, a);
> +  __pldx_range (PLD, KEEP, 1, 1, 0, 0, a);
> +  __pldx_range (PLD, KEEP, -1, 1, 0, 0, a);
> +  __pldx_range (PLD, KEEP, -(1<<21), 1, 0, 0, a);
> +  __pldx_range (PLD, KEEP, (1<<21)-1, 1, 0, 0, a);
> +  __pldx_range (PLD, KEEP, 0, 1, 0, 0, a);
> +  __pldx_range (PLD, KEEP, 0, 2, 0, 0, a);
> +  __pldx_range (PLD, KEEP, 0, 65536, 0, 0, a);
> +  __pldx_range (PLD, KEEP, 0, 1, -(1<<21), 0, a);
> +  __pldx_range (PLD, KEEP, 0, 1, (1<<21)-1, 0, a);
> +  __pldx_range (PLD, KEEP, 0, 1, 0, 1ULL, a);
> +  __pldx_range (PLD, KEEP, 0, 1, 0, (1ULL << 15) - 1, a);
> +  __pldx_range (PLD, KEEP, 0, 1, 0, 1ULL << 15, a);
> +  __pldx_range (PLD, KEEP, 0, 1, 0, (1ULL << 16) - 1, a);
> +  __pldx_range (PLD, KEEP, 0, 1, 0, 1ULL << 16, a);
> +  __pldx_range (PLD, KEEP, 0, 1, 0, (1ULL << 16) + 1, a);
> +  __pldx_range (PLD, KEEP, 0, 1, 0, 1ULL << 28, a);
> +  __pldx_range (PLD, KEEP, 0, 1, 0, (1ULL << 28) + 1, a);
> +  __pldx_range (PLD, KEEP, 0, 1, 0, (1ULL << 29) - 1, a);
> +  __pldx_range (PLD, KEEP, 0, 1, 0, 1ULL << 29, a);
> +  __pldx_range (PLD, KEEP, 0, 1, 0, (1ULL << 29) + 1, a);
> +  __pldx_range (PLD, KEEP, 0, 1, 0, (1ULL << 30), a);
> +  __pldx_range (PLD, KEEP, 0, 1, 0, (1ULL << 31), a);
> +}
> +
> +/*
> +** pld_range:
> +**...
> +**   rprfm   PLDKEEP, x1, \[x0\]
> +**   rprfm   PSTKEEP, x1, \[x0\]
> +**   rprfm   PLDSTRM, x1, \[x0\]
> +**   rprfm   PSTSTRM, x1, \[x0\]
> +**...
> +*/
> +int pld_range (void *a, uint64_t m) {
> +  __pld_range (PLD, KEEP, m, a);
> +  __pld_range (PST, KEEP, m, a);
> +  __pld_range (PLD, STRM, m, a);
> +  __pld_range (PST, STRM, m, a);
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/rprfm_error.c
> b/gcc/testsuite/gcc.target/aarch64/acle/rprfm_error.c
> new file mode 100644
> index 00000000000..6fe71aa9922
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/acle/rprfm_error.c
> @@ -0,0 +1,31 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=armv8-a -O2" } */
> +
> +#include <arm_acle.h>
> +
> +/* Access kind specifiers.  */
> +#define PLD 0
> +#define PST 1
> +/* Retention policies.  */
> +#define KEEP 0
> +#define STRM 1
> +
> +int test (void *a, uint64_t m) {
> +  __pld_range (2, KEEP, m, a);                        /* { dg-error 
> "argument 1 must be a
> constant immediate in range \\\[0,1\\\]" } */
> +  __pld_range (-1, KEEP, m, a);                       /* { dg-error 
> "argument 1 must be a
> constant immediate in range \\\[0,1\\\]" } */
> +  __pld_range (PLD, 2, m, a);                         /* { dg-error 
> "argument 2 must be a
> constant immediate in range \\\[0,1\\\]" } */
> +  __pld_range (PLD, -1, m, a);                        /* { dg-error 
> "argument 2 must be a
> constant immediate in range \\\[0,1\\\]" } */
> +
> +  __pldx_range (2, KEEP, 0, 1, 0, 0, a);              /* { dg-error 
> "argument 1 must
> be a constant immediate in range \\\[0,1\\\]" } */
> +  __pldx_range (PLD, 2, 0, 1, 0, 0, a);               /* { dg-error 
> "argument 2 must
> be a constant immediate in range \\\[0,1\\\]" } */
> +  __pldx_range (-1, KEEP, 0, 1, 0, 0, a);             /* { dg-error 
> "argument 1 must
> be a constant immediate in range \\\[0,1\\\]" } */
> +  __pldx_range (PLD, -1, 0, 1, 0, 0, a);              /* { dg-error 
> "argument 2 must
> be a constant immediate in range \\\[0,1\\\]" } */
> +
> +  __pldx_range (PLD, KEEP, -(1<<21) - 1, 1, 0, 0, a); /* { dg-error 
> "argument 3
> must be a constant immediate in range \\\[-2097152,2097151\\\]" } */
> +  __pldx_range (PLD, KEEP, (1<<21), 1, 0, 0, a);      /* { dg-error 
> "argument 3
> must be a constant immediate in range \\\[-2097152,2097151\\\]" } */
> +  __pldx_range (PLD, KEEP, 0, 0, 0, 0, a);            /* { dg-error 
> "argument 4 must
> be a constant immediate in range \\\[1,65536\\\]" } */
> +  __pldx_range (PLD, KEEP, 0, (1<<16) + 1, 0, 0, a);  /* { dg-error 
> "argument 4
> must be a constant immediate in range \\\[1,65536\\\]" } */
> +  __pldx_range (PLD, KEEP, 0, 1, -(1<<21)-1, 0, a);   /* { dg-error 
> "argument 5
> must be a constant immediate in range \\\[-2097152,2097151\\\]" } */
> +  __pldx_range (PLD, KEEP, 0, 1, (1<<21), 0, a);      /* { dg-error 
> "argument 5
> must be a constant immediate in range \\\[-2097152,2097151\\\]" } */
> +  __pldx_range (PLD, KEEP, 0, 1, 0, -1, a);     /* { dg-error "argument 6 
> must be
> a constant immediate in range \\\[0,9223372036854775806\\\]" } */
> +}
> --
> 2.34.1

Reply via email to