Re: [PATCH v4 8/9] aarch64: Add support for memetag-stack sanitizer using MTE insns

Richard Sandiford Wed, 13 Aug 2025 06:39:54 -0700

claudiu.zissulescu-iancule...@oracle.com writes:
> [...]
>  /* Implement TARGET_MEMTAG_CAN_TAG_ADDRESSES.  Here we tell the rest of the
>     compiler that we automatically ignore the top byte of our pointers, which
> -   allows using -fsanitize=hwaddress.  */
> +   allows using -fsanitize=hwaddress.  In case of -fsanitize=memtag, we
> +   additionally ensure that target supports MEMTAG insns.  */
>  bool
>  aarch64_can_tag_addresses ()
>  {
> +  if (memtag_sanitize_p ())
> +    return !TARGET_ILP32 && TARGET_MEMTAG;
>    return !TARGET_ILP32;
>  }
>  
> +/* Implement TARGET_MEMTAG_TAG_BITSIZE.  */
> +unsigned char
> +aarch64_memtag_tag_bitsize ()
> +{
> +  if (memtag_sanitize_p ())
> +    return AARCH64_MEMTAG_TAG_BITSIZE;
> +  return default_memtag_tag_bitsize ();
> +}
> +
> +/* Implement TARGET_MEMTAG_GRANULE_SIZE.  */
> +unsigned char
> +aarch64_memtag_granule_size ()
> +{
> +  if (memtag_sanitize_p ())
> +    return AARCH64_MEMTAG_GRANULE_SIZE;
> +  return default_memtag_granule_size ();
> +}
> +
> +/* Implement TARGET_MEMTAG_INSERT_RANDOM_TAG.  */
> +rtx
> +aarch64_memtag_insert_random_tag (rtx untagged, rtx target)
> +{
> +  if (memtag_sanitize_p ())
> +    {
> +      insn_code icode = CODE_FOR_gmi;
> +      expand_operand ops_gmi[3];
> +      rtx tmp = gen_reg_rtx (Pmode);
> +      create_output_operand (&ops_gmi[0], tmp, Pmode);
> +      create_input_operand  (&ops_gmi[1], untagged, Pmode);
> +      create_integer_operand  (&ops_gmi[2], 0);
> +      expand_insn (icode, 3, ops_gmi);
> +
> +      icode = CODE_FOR_irg;
> +      expand_operand ops_irg[3];
> +      create_output_operand (&ops_irg[0], target, Pmode);
> +      create_input_operand  (&ops_irg[1], untagged, Pmode);
> +      create_input_operand  (&ops_irg[2], ops_gmi[0].value, Pmode);
> +      expand_insn (icode, 3, ops_irg);
> +      return ops_irg[0].value;
> +    }
> +  else
> +    return default_memtag_insert_random_tag (untagged, target);
> +}
> +
> +/* Implement TARGET_MEMTAG_ADD_TAG.  */
> +rtx
> +aarch64_memtag_add_tag (rtx base, poly_int64 offset, uint8_t tag_offset)
> +{
> +  rtx target = NULL;
> +  poly_int64 addr_offset = offset;
> +  rtx offset_rtx = gen_int_mode (addr_offset, DImode);


This should only be done for the non-default path.  Same below.

> +
> +  if (!memtag_sanitize_p () || tag_offset == 0)
> +    return default_memtag_add_tag (base, offset, tag_offset);

It would be good to use a consistent style about whether to exit
early for !memtag_sanitize_p or use:

  if (memtag_sanitizie_p ())
    ...
  else
    ...default...;

The former seems to be the general style used in GCC.  Same below.

> [...]
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index bade8af7997..472d4e07385 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -8563,46 +8563,49 @@ (define_insn "aarch64_rndrrs"
>  ;; Memory Tagging Extension (MTE) instructions.
>  
>  (define_insn "irg"
> -  [(set (match_operand:DI 0 "register_operand" "=rk")
> +  [(set (match_operand:DI 0 "register_operand")
>       (ior:DI
> -      (and:DI (match_operand:DI 1 "register_operand" "rk")
> +      (and:DI (match_operand:DI 1 "register_operand")
>                (const_int MEMTAG_TAG_MASK)) ;; 0xf0ff...
> -      (ashift:DI (unspec:QI [(match_operand:DI 2 "register_operand" "r")]
> +      (ashift:DI (unspec:QI [(match_operand:DI 2 "aarch64_reg_or_zero")]
>                    UNSPEC_GEN_TAG_RND)
>                   (const_int 56))))]
>    "TARGET_MEMTAG"
> -  "irg\\t%0, %1, %2"
> -  [(set_attr "type" "memtag")]
> +  {@ [ cons: =0, 1, 2 ; attrs: type ]
> +     [ rk      , rk, r  ; memtag ] irg\\t%0, %1, %2
> +     [ rk      , rk, Z  ; memtag ] irg\\t%0, %1
> +  }
>  )
>  
>  (define_insn "gmi"
> -  [(set (match_operand:DI 0 "register_operand" "=r")
> -     (ior:DI (ashift:DI
> -              (const_int 1)
> -              (and:QI (lshiftrt:DI
> -                       (match_operand:DI 1 "register_operand" "rk")
> -                       (const_int 56)) (const_int 15)))
> -             (match_operand:DI 2 "register_operand" "r")))]
> +  [(set (match_operand:DI 0 "register_operand")
> +     (ior:DI
> +      (unspec:DI [(match_operand:DI 1 "register_operand")]
> +                 UNSPEC_GEN_TAG)

UNSPEC_GEN_TAG is for the ADDG case and takes two operands (previously:
the original tag and the offset, now: a pointer with the original tag
and the offset).

I suppose if we don't use UNSPEC_GEN_TAG here, we won't match and use
GMI when operand 2 is zero.  Is that why you changed this pattern too?
If so, I think we should make it:

         (unspec:DI [(match_operand:DI 1 "register_operand")
                     (const_int 0)]
                    UNSPEC_GEN_TAG)

instead.

> +      (match_operand:DI 2 "aarch64_reg_or_zero")))]
>    "TARGET_MEMTAG"
> -  "gmi\\t%0, %1, %2"
> -  [(set_attr "type" "memtag")]
> +  {@ [ cons: =0, 1, 2 ; attrs: type ]
> +     [ r       , rk, r  ; memtag ] gmi\\t%0, %1, %2
> +     [ r       , rk, Z  ; memtag ] gmi\\t%0, %1, xzr
> +  }

There's no need to separate these alternatives out.  We can use rZ
as the constraint and %x2 as the output operand.

>  )
>  
>  (define_insn "addg"
> -  [(set (match_operand:DI 0 "register_operand" "=rk")
> +  [(set (match_operand:DI 0 "register_operand")
>       (ior:DI
> -      (and:DI (plus:DI (match_operand:DI 1 "register_operand" "rk")
> -                       (match_operand:DI 2 "aarch64_granule16_uimm6" "i"))
> -              (const_int -1080863910568919041)) ;; 0xf0ff...
> +      (and:DI (plus:DI (match_operand:DI 1 "register_operand")
> +                       (match_operand:DI 2 "aarch64_granule16_imm6"))
> +              (const_int MEMTAG_TAG_MASK))
>        (ashift:DI
> -       (unspec:QI
> -        [(and:QI (lshiftrt:DI (match_dup 1) (const_int 56)) (const_int 15))
> -         (match_operand:QI 3 "aarch64_memtag_tag_offset" "i")]
> -        UNSPEC_GEN_TAG)
> +           (unspec:DI [(match_dup 1)
> +                       (match_operand:QI 3 "aarch64_memtag_tag_offset")]
> +                       UNSPEC_GEN_TAG)
>         (const_int 56))))]
>    "TARGET_MEMTAG"
> -  "addg\\t%0, %1, #%2, #%3"
> -  [(set_attr "type" "memtag")]
> +  {@ [ cons: =0 , 1  , 2 , 3 ; attrs: type ]
> +     [ rk       , rk , Uag , Utg ; memtag   ] addg\t%0, %1, #%2, #%3
> +     [ rk       , rk , Ung , Utg ; memtag   ] subg\t%0, %1, #%n2, #%3
> +  }
>  )
>  
>  (define_insn "subp"
> @@ -8636,17 +8639,40 @@ (define_insn "ldg"
>  ;; STG doesn't align the address but aborts with alignment fault
>  ;; when the address is not 16-byte aligned.
>  (define_insn "stg"
> -  [(set (mem:QI (unspec:DI
> -      [(plus:DI (match_operand:DI 1 "register_operand" "rk")
> -                (match_operand:DI 2 "aarch64_granule16_simm9" "i"))]
> -      UNSPEC_TAG_SPACE))
> -     (and:QI (lshiftrt:DI (match_operand:DI 0 "register_operand" "rk")
> -                          (const_int 56)) (const_int 15)))]
> -  "TARGET_MEMTAG"
> -  "stg\\t%0, [%1, #%2]"
> +  [(set (match_operand:TI 0 "aarch64_granule16_memory_operand" "=Umg")
> +      (unspec:TI
> +        [(match_operand:TI 1 "aarch64_granule16_memory_operand" "Umg")
> +         (match_operand:DI 2 "register_operand" "rk")]
> +        UNSPEC_TAG_SPACE))]
> +  "TARGET_MEMTAG && aarch64_check_memtag_ops (operands[0], operands[1])"
> +  "stg\\t%2, %0"
> +  [(set_attr "type" "memtag")]
> +)

In the previous reviews, I'd suggested we use things like:

(set (match_operand:TI 0 "aarch64_granule_memory_operand" "+Umg")
     (unspec_volatile:TI
       [(match_dup 0)
        (match_operand:DI 1 "register_operand" "rk")]
       UNSPECV...))

for the STG patterns.  Does that not work?  I remember there was some
follow-up discussion about the post-increment case, but I thought the
plan had been to use a separate pattern for those.

(Sorry if I'm misremembering, been a while since the last series.)

The problem with the pattern in the patch is that operands 0 and 1's
predicates and constraints treat the operands as entirely separate.
aarch64_check_memtag_ops would be applied during insn recognition but
it would have no effect on (for example) register allocation.

That said... I wonder if it would work to drop the aarch64_check_memtag_ops
condition and instead use something like:

  return (GET_RTX_CLASS (GET_CODE (XEXP (operands[1], 0))) == RTX_AUTOINC
          ? "stg\\t%2, %1"
          : "stg\\t%2, %0");

We know that operands 0 and 1 started out as the same address value and
it would be invalid for any later pass to change that value (rather than
the way that the value is computed).

I don't remember seeing that RTX_AUTOINC pattern used before, and it
feels a bit underhand, but I can't immediately think of a reason why
it's wrong in principle.

The unspec must be unspec_volatile though, for the reason in the
previous review:

  The unspec_volatile should ensure that nothing tries to remove the
  store as dead (which would especially be a problem when clearing tags).

ISTR there was an issue about whether the unspec_volatile would
prevent autoinc.  If so, we should teach autoinc not to skip these
particular instructions (using a target hook if necessary, as previously
mentioned).

> +
> +;; ST2G updates allocation tags for two memory granules (i.e. 32 bytes) at
> +;; once, without zero initialization.
> +(define_insn "st2g"
> +  [(set (match_operand:OI 0 "aarch64_granule16_memory_operand" "=Umg")
> +      (unspec:OI
> +        [(match_operand:OI 1 "aarch64_granule16_memory_operand" "Umg")
> +         (match_operand:DI 2 "register_operand" "rk")]
> +        UNSPEC_TAG_SPACE))]
> +  "TARGET_MEMTAG && aarch64_check_memtag_ops (operands[0], operands[1])"
> +  "st2g\\t%2, %0"
>    [(set_attr "type" "memtag")]
>  )
>  
> +(define_expand "tag_memory"
> +  [(match_operand:DI 0 "general_operand" "")
> +   (match_operand:DI 1 "nonmemory_operand" "")
> +   (match_operand:DI 2 "nonmemory_operand" "")]
> +  ""
> +  "
> +{
> +  aarch64_expand_tag_memory (operands[0], operands[1], operands[2]);
> +  DONE;
> +}")
> +
>  ;; Load/Store 64-bit (LS64) instructions.
>  (define_insn "ld64b"
>    [(set (match_operand:V8DI 0 "register_operand" "=r")
> diff --git a/gcc/config/aarch64/constraints.md 
> b/gcc/config/aarch64/constraints.md
> index dc1925dfb6c..ac64b4fb228 100644
> --- a/gcc/config/aarch64/constraints.md
> +++ b/gcc/config/aarch64/constraints.md
> @@ -352,6 +352,12 @@ (define_memory_constraint "Ump"
>         (match_test "aarch64_legitimate_address_p (GET_MODE (op), XEXP (op, 
> 0),
>                                                 true, ADDR_QUERY_LDP_STP)")))
>  
> +(define_memory_constraint "Umg"
> +  "@internal
> +  A memory address for MTE load/store tag operation."
> +  (and (match_code "mem")
> +       (match_test "aarch64_granule16_memory_address_p (op)")))
> +
>  ;; Used for storing or loading pairs in an AdvSIMD register using an STP/LDP
>  ;; as a vector-concat.  The address mode uses the same constraints as if it
>  ;; were for a single value.
> @@ -606,6 +612,26 @@ (define_address_constraint "Dp"
>   An address valid for a prefetch instruction."
>   (match_test "aarch64_address_valid_for_prefetch_p (op, true)"))
>  
> +(define_constraint "Uag"
> +  "@internal
> +  A constant that can be used as address offset for an ADDG operation."
> +  (and (match_code "const_int")
> +       (match_test "IN_RANGE (ival, 0, 1008)
> +                 && !(ival & 0xf)")))
> +
> +(define_constraint "Ung"
> +  "@internal
> +  A constant that can be used as address offset for an SUBG operation (once
> +  negated)."
> +  (and (match_code "const_int")
> +       (match_test "IN_RANGE (ival, -1024, -1)
> +                 && !(ival & 0xf)")))
> +
> +(define_constraint "Utg"
> +  "@internal
> +  A constant that can be used as tag offset for an ADDG/SUBG operation."
> +  (match_operand 0 "aarch64_memtag_tag_offset"))

There's no need for this constraint, since the ADDG operand uses
aarch64_memtag_tag_offset as the predicate.  The constraints for
operand 3 can just be blank.

Thanks,
Richard

> +
>  (define_constraint "vgb"
>    "@internal
>     A constraint that matches an immediate offset valid for SVE LD1B
> diff --git a/gcc/config/aarch64/predicates.md 
> b/gcc/config/aarch64/predicates.md
> index 32056daf329..0a68b3638bc 100644
> --- a/gcc/config/aarch64/predicates.md
> +++ b/gcc/config/aarch64/predicates.md
> @@ -1061,13 +1061,20 @@ (define_predicate 
> "aarch64_bytes_per_sve_vector_operand"
>         (match_test "known_eq (wi::to_poly_wide (op, mode),
>                             BYTES_PER_SVE_VECTOR)")))
>  
> +;; The uimm4 field is a 4-bit field that only accepts immediates in the
> +;; range 0..15.
>  (define_predicate "aarch64_memtag_tag_offset"
>    (and (match_code "const_int")
> -       (match_test "IN_RANGE (INTVAL (op), 0, 15)")))
> +       (match_test "UINTVAL (op) <= 15")))
> +
> +(define_predicate "aarch64_granule16_memory_operand"
> +  (and (match_test "TARGET_MEMTAG")
> +       (match_code "mem")
> +       (match_test "aarch64_granule16_memory_address_p (op)")))
>  
> -(define_predicate "aarch64_granule16_uimm6"
> +(define_predicate "aarch64_granule16_imm6"
>    (and (match_code "const_int")
> -       (match_test "IN_RANGE (INTVAL (op), 0, 1008)
> +       (match_test "IN_RANGE (INTVAL (op), -1024, 1008)
>                   && !(INTVAL (op) & 0xf)")))
>  
>  (define_predicate "aarch64_granule16_simm9"
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index d8f11201361..c073ad3f303 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -18176,8 +18176,10 @@ for a list of supported options.
>  The option cannot be combined with @option{-fsanitize=thread} or
>  @option{-fsanitize=hwaddress}.  Note that the only targets
>  @option{-fsanitize=hwaddress} is currently supported on are x86-64
> -(only with @code{-mlam=u48} or @code{-mlam=u57} options) and AArch64,
> -in both cases only in ABIs with 64-bit pointers.
> +(only with @code{-mlam=u48} or @code{-mlam=u57} options) and AArch64, in both
> +cases only in ABIs with 64-bit pointers.  Similarly,
> +@option{-fsanitize=memtag-stack} is currently only supported on AArch64 ABIs
> +with 64-bit pointers.
>  
>  When compiling with @option{-fsanitize=address}, you should also
>  use @option{-g} to produce more meaningful output.
> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/memtag_1.c 
> b/gcc/testsuite/gcc.target/aarch64/acle/memtag_1.c
> index f8368690032..e94a2220fe3 100644
> --- a/gcc/testsuite/gcc.target/aarch64/acle/memtag_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/acle/memtag_1.c
> @@ -54,9 +54,9 @@ test_memtag_6 (void *p)
>    __arm_mte_set_tag (p);
>  }
>  
> -/* { dg-final { scan-assembler-times {irg\tx..?, x..?, x..?\n} 1 } } */
> +/* { dg-final { scan-assembler-times {irg\tx..?, x..?\n} 1 } } */
>  /* { dg-final { scan-assembler-times {gmi\tx..?, x..?, x..?\n} 1 } } */
>  /* { dg-final { scan-assembler-times {subp\tx..?, x..?, x..?\n} 1 } } */
>  /* { dg-final { scan-assembler-times {addg\tx..?, x..?, #0, #1\n} 1 } } */
>  /* { dg-final { scan-assembler-times {ldg\tx..?, \[x..?, #0\]\n} 1 } } */
> -/* { dg-final { scan-assembler-times {stg\tx..?, \[x..?, #0\]\n} 1 } } */
> \ No newline at end of file
> +/* { dg-final { scan-assembler-times {stg\tx..?, \[x..?\]\n} 1 } } */

Re: [PATCH v4 8/9] aarch64: Add support for memetag-stack sanitizer using MTE insns

Reply via email to