claudiu.zissulescu-iancule...@oracle.com writes: > [...] > /* Implement TARGET_MEMTAG_CAN_TAG_ADDRESSES. Here we tell the rest of the > compiler that we automatically ignore the top byte of our pointers, which > - allows using -fsanitize=hwaddress. */ > + allows using -fsanitize=hwaddress. In case of -fsanitize=memtag, we > + additionally ensure that target supports MEMTAG insns. */ > bool > aarch64_can_tag_addresses () > { > + if (memtag_sanitize_p ()) > + return !TARGET_ILP32 && TARGET_MEMTAG; > return !TARGET_ILP32; > } > > +/* Implement TARGET_MEMTAG_TAG_BITSIZE. */ > +unsigned char > +aarch64_memtag_tag_bitsize () > +{ > + if (memtag_sanitize_p ()) > + return AARCH64_MEMTAG_TAG_BITSIZE; > + return default_memtag_tag_bitsize (); > +} > + > +/* Implement TARGET_MEMTAG_GRANULE_SIZE. */ > +unsigned char > +aarch64_memtag_granule_size () > +{ > + if (memtag_sanitize_p ()) > + return AARCH64_MEMTAG_GRANULE_SIZE; > + return default_memtag_granule_size (); > +} > + > +/* Implement TARGET_MEMTAG_INSERT_RANDOM_TAG. */ > +rtx > +aarch64_memtag_insert_random_tag (rtx untagged, rtx target) > +{ > + if (memtag_sanitize_p ()) > + { > + insn_code icode = CODE_FOR_gmi; > + expand_operand ops_gmi[3]; > + rtx tmp = gen_reg_rtx (Pmode); > + create_output_operand (&ops_gmi[0], tmp, Pmode); > + create_input_operand (&ops_gmi[1], untagged, Pmode); > + create_integer_operand (&ops_gmi[2], 0); > + expand_insn (icode, 3, ops_gmi); > + > + icode = CODE_FOR_irg; > + expand_operand ops_irg[3]; > + create_output_operand (&ops_irg[0], target, Pmode); > + create_input_operand (&ops_irg[1], untagged, Pmode); > + create_input_operand (&ops_irg[2], ops_gmi[0].value, Pmode); > + expand_insn (icode, 3, ops_irg); > + return ops_irg[0].value; > + } > + else > + return default_memtag_insert_random_tag (untagged, target); > +} > + > +/* Implement TARGET_MEMTAG_ADD_TAG. */ > +rtx > +aarch64_memtag_add_tag (rtx base, poly_int64 offset, uint8_t tag_offset) > +{ > + rtx target = NULL; > + poly_int64 addr_offset = offset; > + rtx offset_rtx = gen_int_mode (addr_offset, DImode);
This should only be done for the non-default path. Same below. > + > + if (!memtag_sanitize_p () || tag_offset == 0) > + return default_memtag_add_tag (base, offset, tag_offset); It would be good to use a consistent style about whether to exit early for !memtag_sanitize_p or use: if (memtag_sanitizie_p ()) ... else ...default...; The former seems to be the general style used in GCC. Same below. > [...] > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index bade8af7997..472d4e07385 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -8563,46 +8563,49 @@ (define_insn "aarch64_rndrrs" > ;; Memory Tagging Extension (MTE) instructions. > > (define_insn "irg" > - [(set (match_operand:DI 0 "register_operand" "=rk") > + [(set (match_operand:DI 0 "register_operand") > (ior:DI > - (and:DI (match_operand:DI 1 "register_operand" "rk") > + (and:DI (match_operand:DI 1 "register_operand") > (const_int MEMTAG_TAG_MASK)) ;; 0xf0ff... > - (ashift:DI (unspec:QI [(match_operand:DI 2 "register_operand" "r")] > + (ashift:DI (unspec:QI [(match_operand:DI 2 "aarch64_reg_or_zero")] > UNSPEC_GEN_TAG_RND) > (const_int 56))))] > "TARGET_MEMTAG" > - "irg\\t%0, %1, %2" > - [(set_attr "type" "memtag")] > + {@ [ cons: =0, 1, 2 ; attrs: type ] > + [ rk , rk, r ; memtag ] irg\\t%0, %1, %2 > + [ rk , rk, Z ; memtag ] irg\\t%0, %1 > + } > ) > > (define_insn "gmi" > - [(set (match_operand:DI 0 "register_operand" "=r") > - (ior:DI (ashift:DI > - (const_int 1) > - (and:QI (lshiftrt:DI > - (match_operand:DI 1 "register_operand" "rk") > - (const_int 56)) (const_int 15))) > - (match_operand:DI 2 "register_operand" "r")))] > + [(set (match_operand:DI 0 "register_operand") > + (ior:DI > + (unspec:DI [(match_operand:DI 1 "register_operand")] > + UNSPEC_GEN_TAG) UNSPEC_GEN_TAG is for the ADDG case and takes two operands (previously: the original tag and the offset, now: a pointer with the original tag and the offset). I suppose if we don't use UNSPEC_GEN_TAG here, we won't match and use GMI when operand 2 is zero. Is that why you changed this pattern too? If so, I think we should make it: (unspec:DI [(match_operand:DI 1 "register_operand") (const_int 0)] UNSPEC_GEN_TAG) instead. > + (match_operand:DI 2 "aarch64_reg_or_zero")))] > "TARGET_MEMTAG" > - "gmi\\t%0, %1, %2" > - [(set_attr "type" "memtag")] > + {@ [ cons: =0, 1, 2 ; attrs: type ] > + [ r , rk, r ; memtag ] gmi\\t%0, %1, %2 > + [ r , rk, Z ; memtag ] gmi\\t%0, %1, xzr > + } There's no need to separate these alternatives out. We can use rZ as the constraint and %x2 as the output operand. > ) > > (define_insn "addg" > - [(set (match_operand:DI 0 "register_operand" "=rk") > + [(set (match_operand:DI 0 "register_operand") > (ior:DI > - (and:DI (plus:DI (match_operand:DI 1 "register_operand" "rk") > - (match_operand:DI 2 "aarch64_granule16_uimm6" "i")) > - (const_int -1080863910568919041)) ;; 0xf0ff... > + (and:DI (plus:DI (match_operand:DI 1 "register_operand") > + (match_operand:DI 2 "aarch64_granule16_imm6")) > + (const_int MEMTAG_TAG_MASK)) > (ashift:DI > - (unspec:QI > - [(and:QI (lshiftrt:DI (match_dup 1) (const_int 56)) (const_int 15)) > - (match_operand:QI 3 "aarch64_memtag_tag_offset" "i")] > - UNSPEC_GEN_TAG) > + (unspec:DI [(match_dup 1) > + (match_operand:QI 3 "aarch64_memtag_tag_offset")] > + UNSPEC_GEN_TAG) > (const_int 56))))] > "TARGET_MEMTAG" > - "addg\\t%0, %1, #%2, #%3" > - [(set_attr "type" "memtag")] > + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: type ] > + [ rk , rk , Uag , Utg ; memtag ] addg\t%0, %1, #%2, #%3 > + [ rk , rk , Ung , Utg ; memtag ] subg\t%0, %1, #%n2, #%3 > + } > ) > > (define_insn "subp" > @@ -8636,17 +8639,40 @@ (define_insn "ldg" > ;; STG doesn't align the address but aborts with alignment fault > ;; when the address is not 16-byte aligned. > (define_insn "stg" > - [(set (mem:QI (unspec:DI > - [(plus:DI (match_operand:DI 1 "register_operand" "rk") > - (match_operand:DI 2 "aarch64_granule16_simm9" "i"))] > - UNSPEC_TAG_SPACE)) > - (and:QI (lshiftrt:DI (match_operand:DI 0 "register_operand" "rk") > - (const_int 56)) (const_int 15)))] > - "TARGET_MEMTAG" > - "stg\\t%0, [%1, #%2]" > + [(set (match_operand:TI 0 "aarch64_granule16_memory_operand" "=Umg") > + (unspec:TI > + [(match_operand:TI 1 "aarch64_granule16_memory_operand" "Umg") > + (match_operand:DI 2 "register_operand" "rk")] > + UNSPEC_TAG_SPACE))] > + "TARGET_MEMTAG && aarch64_check_memtag_ops (operands[0], operands[1])" > + "stg\\t%2, %0" > + [(set_attr "type" "memtag")] > +) In the previous reviews, I'd suggested we use things like: (set (match_operand:TI 0 "aarch64_granule_memory_operand" "+Umg") (unspec_volatile:TI [(match_dup 0) (match_operand:DI 1 "register_operand" "rk")] UNSPECV...)) for the STG patterns. Does that not work? I remember there was some follow-up discussion about the post-increment case, but I thought the plan had been to use a separate pattern for those. (Sorry if I'm misremembering, been a while since the last series.) The problem with the pattern in the patch is that operands 0 and 1's predicates and constraints treat the operands as entirely separate. aarch64_check_memtag_ops would be applied during insn recognition but it would have no effect on (for example) register allocation. That said... I wonder if it would work to drop the aarch64_check_memtag_ops condition and instead use something like: return (GET_RTX_CLASS (GET_CODE (XEXP (operands[1], 0))) == RTX_AUTOINC ? "stg\\t%2, %1" : "stg\\t%2, %0"); We know that operands 0 and 1 started out as the same address value and it would be invalid for any later pass to change that value (rather than the way that the value is computed). I don't remember seeing that RTX_AUTOINC pattern used before, and it feels a bit underhand, but I can't immediately think of a reason why it's wrong in principle. The unspec must be unspec_volatile though, for the reason in the previous review: The unspec_volatile should ensure that nothing tries to remove the store as dead (which would especially be a problem when clearing tags). ISTR there was an issue about whether the unspec_volatile would prevent autoinc. If so, we should teach autoinc not to skip these particular instructions (using a target hook if necessary, as previously mentioned). > + > +;; ST2G updates allocation tags for two memory granules (i.e. 32 bytes) at > +;; once, without zero initialization. > +(define_insn "st2g" > + [(set (match_operand:OI 0 "aarch64_granule16_memory_operand" "=Umg") > + (unspec:OI > + [(match_operand:OI 1 "aarch64_granule16_memory_operand" "Umg") > + (match_operand:DI 2 "register_operand" "rk")] > + UNSPEC_TAG_SPACE))] > + "TARGET_MEMTAG && aarch64_check_memtag_ops (operands[0], operands[1])" > + "st2g\\t%2, %0" > [(set_attr "type" "memtag")] > ) > > +(define_expand "tag_memory" > + [(match_operand:DI 0 "general_operand" "") > + (match_operand:DI 1 "nonmemory_operand" "") > + (match_operand:DI 2 "nonmemory_operand" "")] > + "" > + " > +{ > + aarch64_expand_tag_memory (operands[0], operands[1], operands[2]); > + DONE; > +}") > + > ;; Load/Store 64-bit (LS64) instructions. > (define_insn "ld64b" > [(set (match_operand:V8DI 0 "register_operand" "=r") > diff --git a/gcc/config/aarch64/constraints.md > b/gcc/config/aarch64/constraints.md > index dc1925dfb6c..ac64b4fb228 100644 > --- a/gcc/config/aarch64/constraints.md > +++ b/gcc/config/aarch64/constraints.md > @@ -352,6 +352,12 @@ (define_memory_constraint "Ump" > (match_test "aarch64_legitimate_address_p (GET_MODE (op), XEXP (op, > 0), > true, ADDR_QUERY_LDP_STP)"))) > > +(define_memory_constraint "Umg" > + "@internal > + A memory address for MTE load/store tag operation." > + (and (match_code "mem") > + (match_test "aarch64_granule16_memory_address_p (op)"))) > + > ;; Used for storing or loading pairs in an AdvSIMD register using an STP/LDP > ;; as a vector-concat. The address mode uses the same constraints as if it > ;; were for a single value. > @@ -606,6 +612,26 @@ (define_address_constraint "Dp" > An address valid for a prefetch instruction." > (match_test "aarch64_address_valid_for_prefetch_p (op, true)")) > > +(define_constraint "Uag" > + "@internal > + A constant that can be used as address offset for an ADDG operation." > + (and (match_code "const_int") > + (match_test "IN_RANGE (ival, 0, 1008) > + && !(ival & 0xf)"))) > + > +(define_constraint "Ung" > + "@internal > + A constant that can be used as address offset for an SUBG operation (once > + negated)." > + (and (match_code "const_int") > + (match_test "IN_RANGE (ival, -1024, -1) > + && !(ival & 0xf)"))) > + > +(define_constraint "Utg" > + "@internal > + A constant that can be used as tag offset for an ADDG/SUBG operation." > + (match_operand 0 "aarch64_memtag_tag_offset")) There's no need for this constraint, since the ADDG operand uses aarch64_memtag_tag_offset as the predicate. The constraints for operand 3 can just be blank. Thanks, Richard > + > (define_constraint "vgb" > "@internal > A constraint that matches an immediate offset valid for SVE LD1B > diff --git a/gcc/config/aarch64/predicates.md > b/gcc/config/aarch64/predicates.md > index 32056daf329..0a68b3638bc 100644 > --- a/gcc/config/aarch64/predicates.md > +++ b/gcc/config/aarch64/predicates.md > @@ -1061,13 +1061,20 @@ (define_predicate > "aarch64_bytes_per_sve_vector_operand" > (match_test "known_eq (wi::to_poly_wide (op, mode), > BYTES_PER_SVE_VECTOR)"))) > > +;; The uimm4 field is a 4-bit field that only accepts immediates in the > +;; range 0..15. > (define_predicate "aarch64_memtag_tag_offset" > (and (match_code "const_int") > - (match_test "IN_RANGE (INTVAL (op), 0, 15)"))) > + (match_test "UINTVAL (op) <= 15"))) > + > +(define_predicate "aarch64_granule16_memory_operand" > + (and (match_test "TARGET_MEMTAG") > + (match_code "mem") > + (match_test "aarch64_granule16_memory_address_p (op)"))) > > -(define_predicate "aarch64_granule16_uimm6" > +(define_predicate "aarch64_granule16_imm6" > (and (match_code "const_int") > - (match_test "IN_RANGE (INTVAL (op), 0, 1008) > + (match_test "IN_RANGE (INTVAL (op), -1024, 1008) > && !(INTVAL (op) & 0xf)"))) > > (define_predicate "aarch64_granule16_simm9" > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi > index d8f11201361..c073ad3f303 100644 > --- a/gcc/doc/invoke.texi > +++ b/gcc/doc/invoke.texi > @@ -18176,8 +18176,10 @@ for a list of supported options. > The option cannot be combined with @option{-fsanitize=thread} or > @option{-fsanitize=hwaddress}. Note that the only targets > @option{-fsanitize=hwaddress} is currently supported on are x86-64 > -(only with @code{-mlam=u48} or @code{-mlam=u57} options) and AArch64, > -in both cases only in ABIs with 64-bit pointers. > +(only with @code{-mlam=u48} or @code{-mlam=u57} options) and AArch64, in both > +cases only in ABIs with 64-bit pointers. Similarly, > +@option{-fsanitize=memtag-stack} is currently only supported on AArch64 ABIs > +with 64-bit pointers. > > When compiling with @option{-fsanitize=address}, you should also > use @option{-g} to produce more meaningful output. > diff --git a/gcc/testsuite/gcc.target/aarch64/acle/memtag_1.c > b/gcc/testsuite/gcc.target/aarch64/acle/memtag_1.c > index f8368690032..e94a2220fe3 100644 > --- a/gcc/testsuite/gcc.target/aarch64/acle/memtag_1.c > +++ b/gcc/testsuite/gcc.target/aarch64/acle/memtag_1.c > @@ -54,9 +54,9 @@ test_memtag_6 (void *p) > __arm_mte_set_tag (p); > } > > -/* { dg-final { scan-assembler-times {irg\tx..?, x..?, x..?\n} 1 } } */ > +/* { dg-final { scan-assembler-times {irg\tx..?, x..?\n} 1 } } */ > /* { dg-final { scan-assembler-times {gmi\tx..?, x..?, x..?\n} 1 } } */ > /* { dg-final { scan-assembler-times {subp\tx..?, x..?, x..?\n} 1 } } */ > /* { dg-final { scan-assembler-times {addg\tx..?, x..?, #0, #1\n} 1 } } */ > /* { dg-final { scan-assembler-times {ldg\tx..?, \[x..?, #0\]\n} 1 } } */ > -/* { dg-final { scan-assembler-times {stg\tx..?, \[x..?, #0\]\n} 1 } } */ > \ No newline at end of file > +/* { dg-final { scan-assembler-times {stg\tx..?, \[x..?\]\n} 1 } } */