> -----Original Message-----
> From: Alfie Richards <[email protected]>
> Sent: 09 February 2026 10:10
> To: [email protected]
> Cc: Richard Earnshaw <[email protected]>; Tamar Christina
> <[email protected]>; [email protected]; Alice Carlotti
> <[email protected]>; Alex Coplan <[email protected]>; Wilco
> Dijkstra <[email protected]>; [email protected]; Alfie
> Richards <[email protected]>
> Subject: [PATCH] aarch64: Add support for range prefetch intrinsic.
>
> Hi all,
>
> This patch adds support for the aarch64 range prefetch intrinsic.
>
> Bootstrapped and reg tested for AArch64-linux-gnu.
>
> Okay for master (in stage 1 maybe?)
>
> KR,
> Alfie
>
> -- >8 --
>
> Also updates require_const_argument to always return a value in range.
>
> gcc/ChangeLog:
>
> * config/aarch64/aarch64-builtins.cc (enum aarch64_builtins):
> Add AARCH64_PREFETCH_PLD_RANGE and
> AARCH64_PREFETCH_PLDX_RANGE.
> (aarch64_init_prefetch_builtins): Add initialization of
> __pld_range and __pldx_range.
> (require_const_argument): Update to return the minval if value
> is out of range.
> (aarch64_expand_prefetch_range_builtin): New function.
> (aarch64_general_expand_builtin): Add support for
> AARCH64_PREFETCH_PLD_RANGE and
> AARCH64_PREFETCH_PLDX_RANGE.
> * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Add
> __ARM_PREFETCH_RANGE macro.
> * config/aarch64/aarch64.md (unspec): Add UNSPEC_PLDX_RANGE
> and
> UNSPEC_PLD_RANGE
> (aarch64_rprfm): New instruction.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/aarch64/acle/rprfm.c: New test.
> * gcc.target/aarch64/acle/rprfm_error.c: New test.
> ---
> gcc/config/aarch64/aarch64-builtins.cc | 128 +++++++++++++++++-
> gcc/config/aarch64/aarch64-c.cc | 1 +
> gcc/config/aarch64/aarch64.md | 14 ++
> gcc/testsuite/gcc.target/aarch64/acle/rprfm.c | 107 +++++++++++++++
> .../gcc.target/aarch64/acle/rprfm_error.c | 31 +++++
> 5 files changed, 276 insertions(+), 5 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rprfm.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rprfm_error.c
>
> diff --git a/gcc/config/aarch64/aarch64-builtins.cc
> b/gcc/config/aarch64/aarch64-builtins.cc
> index dd74cf06ef2..39658ffab11 100644
> --- a/gcc/config/aarch64/aarch64-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-builtins.cc
> @@ -894,6 +894,8 @@ enum aarch64_builtins
> AARCH64_WSR128,
> AARCH64_PREFETCH_PLD,
> AARCH64_PREFETCH_PLDX,
> + AARCH64_PREFETCH_PLD_RANGE,
> + AARCH64_PREFETCH_PLDX_RANGE,
> AARCH64_PREFETCH_PLI,
> AARCH64_PREFETCH_PLIX,
> AARCH64_PREFETCH_PLDIR,
> @@ -2249,6 +2251,18 @@ aarch64_init_prefetch_builtins (void)
>
> ftype = build_function_type_list (void_type_node, cv_argtype, NULL_TREE);
> AARCH64_INIT_PREFETCH_BUILTINS_DECL ("__pldir", PLDIR);
> +
> + ftype = build_function_type_list (void_type_node, unsigned_type_node,
> + unsigned_type_node, integer_type_node,
> + unsigned_type_node, integer_type_node,
> + size_type_node, cv_argtype, NULL);
> + AARCH64_INIT_PREFETCH_BUILTINS_DECL ("__pldx_range", PLDX_RANGE);
> +
> + ftype = build_function_type_list (void_type_node, unsigned_type_node,
> + unsigned_type_node,
> + long_long_unsigned_type_node,
> cv_argtype,
> + NULL);
> + AARCH64_INIT_PREFETCH_BUILTINS_DECL ("__pld_range", PLD_RANGE);
> }
>
> /* Initialize the memory tagging extension (MTE) builtins. */
> @@ -3650,9 +3664,13 @@ require_const_argument (tree exp, unsigned int
> argno, HOST_WIDE_INT minval,
> auto argval = wi::to_widest (arg);
>
> if (argval < minval || argval > maxval)
> - error_at (EXPR_LOCATION (exp),
> - "argument %d must be a constant immediate "
> - "in range [%wd,%wd]", argno + 1, minval, maxval);
> + {
> + error_at (EXPR_LOCATION (exp),
> + "argument %d must be a constant immediate "
> + "in range [%wd,%wd]",
> + argno + 1, minval, maxval);
> + return minval;
> + }
>
Heh, I thought error_at was NORETURN, but I guess it's not.
maybe -1 is a better return value since the range is invalid and
the value shouldn't be used.
> HOST_WIDE_INT retval = argval.to_shwi ();
> return retval;
> @@ -3723,8 +3741,104 @@ aarch64_expand_prefetch_builtin (tree exp, int
> fcode)
> maybe_expand_insn (CODE_FOR_aarch64_pldx, 2, ops);
> }
>
> -/* Expand an expression EXP that calls a MEMTAG built-in FCODE
> - with result going to TARGET. */
> +/* Expand a prefetch range builtin EXP. */
> +void
> +aarch64_expand_prefetch_range_builtin (tree exp, int fcode)
> +{
> + char prfop[11];
> + class expand_operand ops[3];
> +
> + static const char *kind_s[] = {"PLD", "PST"};
> + static const char *rettn_s[] = {"KEEP", "STRM"};
> +
> + int argno = 0;
> +
> + int kind_id = require_const_argument (exp, argno++, 0, ARRAY_SIZE
> (kind_s));
> + int rettn_id = require_const_argument (exp, argno++, 0, ARRAY_SIZE
> (rettn_s));
> +
> + rtx metadata = NULL_RTX;
> +
> + switch (fcode)
> + {
> + case AARCH64_PREFETCH_PLDX_RANGE:
> + {
> + /* length must be in [-2^21,2^21). */
> + int length = require_const_argument (exp, argno++, -(1 << 21), 1 <<
> 21);
> + gcc_assert (length >= -(1 << 21) && length < (1 << 21));
> +
> + /* count must be in [1,2^16]. */
> + int count = require_const_argument (exp, argno++, 1, (1 << 16) + 1);
> + gcc_assert (count >= 1 && count <= (1 << 16));
> +
> + /* stride must be in [-2^21,2^21). */
> + int stride = require_const_argument (exp, argno++, -(1 << 21), 1 <<
> 21);
> + gcc_assert (stride >= -(1 << 21) && stride < (1 << 21));
> +
> + /* There is no requirements on reuse_distance other than to be a
> + non-negative integer. However it is meaningless for
> + values less than 2^15 or greater than 2^29. */
> + uint64_t reuse_distance = require_const_argument (exp, argno++, 0,
> + LONG_LONG_MAX);
> +
> + uint64_t length_bits = ((uint64_t) length) & ((1 << 22) - 1);
> + uint64_t count_bits = ((uint64_t) count - 1) & ((1 << 16) - 1);
> + uint64_t stride_bits = ((uint64_t) stride) & ((1 << 22) - 1);
> +
> + uint64_t reuse_distance_bits = 0;
> + /* If reuse distance > 512MiB or = 0 then use 0 to represent distance
> + unknown. */
> + if (reuse_distance != 0 && reuse_distance <= (1ULL << 29))
> + {
> + /* Find the largest n such that (2 ^ (15-n)) * 32KB >= reuse
> + distance. */
> + if (reuse_distance <= (1ULL << 15))
> + reuse_distance_bits = 15;
> + else
> + reuse_distance_bits = __builtin_clzll (reuse_distance - 1) - 34;
> +
> + /* Reuse distance is a 4 bit value. */
> + gcc_assert (reuse_distance_bits < (1 << 4));
> + }
> +
> + uint64_t metadata_val = length_bits
> + | (count_bits << 22)
> + | (stride_bits << 38)
> + | (reuse_distance_bits << 60);
> +
> + metadata = GEN_INT (metadata_val);
> + break;
> + }
> + case AARCH64_PREFETCH_PLD_RANGE:
> + {
> + tree metadata_arg = CALL_EXPR_ARG (exp, argno++);
> + metadata = copy_to_mode_reg (E_DImode, expand_normal
> (metadata_arg));
> + break;
> + }
> + default:
> + gcc_unreachable ();
> + }
> +
> + /* Any -1 id variable is to be user-supplied. Here we fill these in and
> run
> + bounds checks on them. "PLI" is used only implicitly by AARCH64_PLI &
> + AARCH64_PLIX, never explicitly. */
> + rtx address = expand_expr (CALL_EXPR_ARG (exp, argno), NULL_RTX,
> Pmode,
> + EXPAND_NORMAL);
> +
> + if (seen_error ())
> + return;
> +
> + sprintf (prfop, "%s%s", kind_s[kind_id], rettn_s[rettn_id]);
> +
> + rtx const_str = rtx_alloc (CONST_STRING);
> + PUT_CODE (const_str, CONST_STRING);
> + XSTR (const_str, 0) = ggc_strdup (prfop);
> +
> + create_fixed_operand (&ops[0], const_str);
> + create_input_operand (&ops[1], metadata, E_DImode);
> + create_address_operand (&ops[2], address);
> + maybe_expand_insn (CODE_FOR_aarch64_rprfm, 3, ops);
> +}
> +
> static rtx
> aarch64_expand_builtin_memtag (int fcode, tree exp, rtx target)
> {
> @@ -4578,6 +4692,10 @@ aarch64_general_expand_builtin (unsigned int
> fcode, tree exp, rtx target,
> case AARCH64_PREFETCH_PLDIR:
> aarch64_expand_pldir_builtin (exp);
> return target;
> + case AARCH64_PREFETCH_PLD_RANGE:
> + case AARCH64_PREFETCH_PLDX_RANGE:
> + aarch64_expand_prefetch_range_builtin (exp, fcode);
> + return target;
> case AARCH64_BUILTIN_CHKFEAT:
> {
> rtx x16_reg = gen_rtx_REG (DImode, R16_REGNUM);
> diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-
> c.cc
> index f8be998da16..58fa761a9bb 100644
> --- a/gcc/config/aarch64/aarch64-c.cc
> +++ b/gcc/config/aarch64/aarch64-c.cc
> @@ -310,6 +310,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
> "__ARM_FEATURE_SME2p1", pfile);
> aarch64_def_or_undef (TARGET_FAMINMAX,
> "__ARM_FEATURE_FAMINMAX", pfile);
> aarch64_def_or_undef (TARGET_PCDPHINT, "__ARM_FEATURE_PCDPHINT",
> pfile);
> + builtin_define ("__ARM_PREFETCH_RANGE");
>
> // Function multi-versioning defines
> aarch64_def_or_undef (targetm.has_ifunc_p (),
> diff --git a/gcc/config/aarch64/aarch64.md
> b/gcc/config/aarch64/aarch64.md
> index 70a64a6c0ed..4c5485cba1d 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -393,6 +393,8 @@ (define_c_enum "unspec" [
> UNSPEC_SYSREG_WTI
> UNSPEC_PLDX
> UNSPEC_PLDIR
> + UNSPEC_PLDX_RANGE
> + UNSPEC_PLD_RANGE
> ;; Represents an SVE-style lane index, in which the indexing applies
> ;; within the containing 128-bit block.
> UNSPEC_SVE_LANE_SELECT
> @@ -1381,6 +1383,18 @@ (define_insn "aarch64_pldx"
> [(set_attr "type" "load_4")]
> )
>
> +(define_insn "aarch64_rprfm"
> + [(unspec [(match_operand 0 "" "")
> + (match_operand:DI 1 "aarch64_prefetch_operand" "Dp")
> + (match_operand:DI 2 "register_operand" "r")] UNSPEC_PLDX)]
> + ""
> + {
> + operands[2] = gen_rtx_MEM (DImode, operands[2]);
> + return "rprfm\\t%0, %1, %2";
> + }
> + [(set_attr "type" "load_4")]
> +)
I think operand 2 here should be
match_operand:DI 2 "memory_operand" "Q")
which should allow you to drop the last minute conversion to MEM.
OK with those changes.
Thanks,
Tamar
> +
> (define_insn "trap"
> [(trap_if (const_int 1) (const_int 8))]
> ""
> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/rprfm.c
> b/gcc/testsuite/gcc.target/aarch64/acle/rprfm.c
> new file mode 100644
> index 00000000000..02f0f11223b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/acle/rprfm.c
> @@ -0,0 +1,107 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=armv8-a -O1 -fno-schedule-insns" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +#include <arm_acle.h>
> +
> +/* Access kind specifiers. */
> +#define PLD 0
> +#define PST 1
> +/* Retention policies. */
> +#define KEEP 0
> +#define STRM 1
> +
> +/* This test is a bit awkward as we need to test the constants that get
> passed
> + into x1. This may be a bit fragile. */
> +
> +/*
> +** pldx_range:
> +**...
> +** mov x1, 0
> +** rprfm PLDKEEP, x1, \[x0\]
> +** rprfm PSTKEEP, x1, \[x0\]
> +** rprfm PLDSTRM, x1, \[x0\]
> +** rprfm PSTSTRM, x1, \[x0\]
> +** mov x1, 1
> +** rprfm PLDKEEP, x1, \[x0\]
> +** mov x1, 4194303
> +** rprfm PLDKEEP, x1, \[x0\]
> +** mov x1, 2097152
> +** rprfm PLDKEEP, x1, \[x0\]
> +** mov x1, 2097151
> +** rprfm PLDKEEP, x1, \[x0\]
> +** mov x1, 0
> +** rprfm PLDKEEP, x1, \[x0\]
> +** mov x1, 4194304
> +** rprfm PLDKEEP, x1, \[x0\]
> +** mov x1, 274873712640
> +** rprfm PLDKEEP, x1, \[x0\]
> +** mov x1, 576460752303423488
> +** rprfm PLDKEEP, x1, \[x0\]
> +** mov x1, 576460477425516544
> +** rprfm PLDKEEP, x1, \[x0\]
> +** mov x1, -1152921504606846976
> +** rprfm PLDKEEP, x1, \[x0\]
> +** rprfm PLDKEEP, x1, \[x0\]
> +** rprfm PLDKEEP, x1, \[x0\]
> +** mov x1, -2305843009213693952
> +** rprfm PLDKEEP, x1, \[x0\]
> +** rprfm PLDKEEP, x1, \[x0\]
> +** mov x1, -3458764513820540928
> +** rprfm PLDKEEP, x1, \[x0\]
> +** mov x1, 2305843009213693952
> +** rprfm PLDKEEP, x1, \[x0\]
> +** mov x1, 1152921504606846976
> +** rprfm PLDKEEP, x1, \[x0\]
> +** rprfm PLDKEEP, x1, \[x0\]
> +** rprfm PLDKEEP, x1, \[x0\]
> +** mov x1, 0
> +** rprfm PLDKEEP, x1, \[x0\]
> +** rprfm PLDKEEP, x1, \[x0\]
> +** rprfm PLDKEEP, x1, \[x0\]
> +**...
> +*/
> +int pldx_range (void *a) {
> + __pldx_range (PLD, KEEP, 0, 1, 0, 0, a);
> + __pldx_range (PST, KEEP, 0, 1, 0, 0, a);
> + __pldx_range (PLD, STRM, 0, 1, 0, 0, a);
> + __pldx_range (PST, STRM, 0, 1, 0, 0, a);
> + __pldx_range (PLD, KEEP, 1, 1, 0, 0, a);
> + __pldx_range (PLD, KEEP, -1, 1, 0, 0, a);
> + __pldx_range (PLD, KEEP, -(1<<21), 1, 0, 0, a);
> + __pldx_range (PLD, KEEP, (1<<21)-1, 1, 0, 0, a);
> + __pldx_range (PLD, KEEP, 0, 1, 0, 0, a);
> + __pldx_range (PLD, KEEP, 0, 2, 0, 0, a);
> + __pldx_range (PLD, KEEP, 0, 65536, 0, 0, a);
> + __pldx_range (PLD, KEEP, 0, 1, -(1<<21), 0, a);
> + __pldx_range (PLD, KEEP, 0, 1, (1<<21)-1, 0, a);
> + __pldx_range (PLD, KEEP, 0, 1, 0, 1ULL, a);
> + __pldx_range (PLD, KEEP, 0, 1, 0, (1ULL << 15) - 1, a);
> + __pldx_range (PLD, KEEP, 0, 1, 0, 1ULL << 15, a);
> + __pldx_range (PLD, KEEP, 0, 1, 0, (1ULL << 16) - 1, a);
> + __pldx_range (PLD, KEEP, 0, 1, 0, 1ULL << 16, a);
> + __pldx_range (PLD, KEEP, 0, 1, 0, (1ULL << 16) + 1, a);
> + __pldx_range (PLD, KEEP, 0, 1, 0, 1ULL << 28, a);
> + __pldx_range (PLD, KEEP, 0, 1, 0, (1ULL << 28) + 1, a);
> + __pldx_range (PLD, KEEP, 0, 1, 0, (1ULL << 29) - 1, a);
> + __pldx_range (PLD, KEEP, 0, 1, 0, 1ULL << 29, a);
> + __pldx_range (PLD, KEEP, 0, 1, 0, (1ULL << 29) + 1, a);
> + __pldx_range (PLD, KEEP, 0, 1, 0, (1ULL << 30), a);
> + __pldx_range (PLD, KEEP, 0, 1, 0, (1ULL << 31), a);
> +}
> +
> +/*
> +** pld_range:
> +**...
> +** rprfm PLDKEEP, x1, \[x0\]
> +** rprfm PSTKEEP, x1, \[x0\]
> +** rprfm PLDSTRM, x1, \[x0\]
> +** rprfm PSTSTRM, x1, \[x0\]
> +**...
> +*/
> +int pld_range (void *a, uint64_t m) {
> + __pld_range (PLD, KEEP, m, a);
> + __pld_range (PST, KEEP, m, a);
> + __pld_range (PLD, STRM, m, a);
> + __pld_range (PST, STRM, m, a);
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/rprfm_error.c
> b/gcc/testsuite/gcc.target/aarch64/acle/rprfm_error.c
> new file mode 100644
> index 00000000000..6fe71aa9922
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/acle/rprfm_error.c
> @@ -0,0 +1,31 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=armv8-a -O2" } */
> +
> +#include <arm_acle.h>
> +
> +/* Access kind specifiers. */
> +#define PLD 0
> +#define PST 1
> +/* Retention policies. */
> +#define KEEP 0
> +#define STRM 1
> +
> +int test (void *a, uint64_t m) {
> + __pld_range (2, KEEP, m, a); /* { dg-error
> "argument 1 must be a
> constant immediate in range \\\[0,1\\\]" } */
> + __pld_range (-1, KEEP, m, a); /* { dg-error
> "argument 1 must be a
> constant immediate in range \\\[0,1\\\]" } */
> + __pld_range (PLD, 2, m, a); /* { dg-error
> "argument 2 must be a
> constant immediate in range \\\[0,1\\\]" } */
> + __pld_range (PLD, -1, m, a); /* { dg-error
> "argument 2 must be a
> constant immediate in range \\\[0,1\\\]" } */
> +
> + __pldx_range (2, KEEP, 0, 1, 0, 0, a); /* { dg-error
> "argument 1 must
> be a constant immediate in range \\\[0,1\\\]" } */
> + __pldx_range (PLD, 2, 0, 1, 0, 0, a); /* { dg-error
> "argument 2 must
> be a constant immediate in range \\\[0,1\\\]" } */
> + __pldx_range (-1, KEEP, 0, 1, 0, 0, a); /* { dg-error
> "argument 1 must
> be a constant immediate in range \\\[0,1\\\]" } */
> + __pldx_range (PLD, -1, 0, 1, 0, 0, a); /* { dg-error
> "argument 2 must
> be a constant immediate in range \\\[0,1\\\]" } */
> +
> + __pldx_range (PLD, KEEP, -(1<<21) - 1, 1, 0, 0, a); /* { dg-error
> "argument 3
> must be a constant immediate in range \\\[-2097152,2097151\\\]" } */
> + __pldx_range (PLD, KEEP, (1<<21), 1, 0, 0, a); /* { dg-error
> "argument 3
> must be a constant immediate in range \\\[-2097152,2097151\\\]" } */
> + __pldx_range (PLD, KEEP, 0, 0, 0, 0, a); /* { dg-error
> "argument 4 must
> be a constant immediate in range \\\[1,65536\\\]" } */
> + __pldx_range (PLD, KEEP, 0, (1<<16) + 1, 0, 0, a); /* { dg-error
> "argument 4
> must be a constant immediate in range \\\[1,65536\\\]" } */
> + __pldx_range (PLD, KEEP, 0, 1, -(1<<21)-1, 0, a); /* { dg-error
> "argument 5
> must be a constant immediate in range \\\[-2097152,2097151\\\]" } */
> + __pldx_range (PLD, KEEP, 0, 1, (1<<21), 0, a); /* { dg-error
> "argument 5
> must be a constant immediate in range \\\[-2097152,2097151\\\]" } */
> + __pldx_range (PLD, KEEP, 0, 1, 0, -1, a); /* { dg-error "argument 6
> must be
> a constant immediate in range \\\[0,9223372036854775806\\\]" } */
> +}
> --
> 2.34.1