pcc created this revision. pcc added reviewers: eugenis, hctim. Herald added subscribers: Sanitizers, cfe-commits, danielkiss, hiraditya, kristof.beyls. Herald added projects: clang, Sanitizers, LLVM. pcc requested review of this revision.
>From a code size perspective it turns out to be better to use a callee-saved register to pass the shadow base. For non-leaf functions it avoids the need to reload the shadow base into x9 after each function call, at the cost of an additional stack slot to save the caller's x20. But with x9 there is also a stack size cost, either as a result of copying x9 to a callee-saved register across calls or by spilling it to stack, so for the non-leaf functions the change to stack usage is largely neutral. It is also code size (and stack size) neutral for many leaf functions. Although they now need to save/restore x20 this can typically be combined via LDP/STP into the x30 save/restore. In the case where the function needs callee-saved registers or stack spills we end up needing, on average, 8 more bytes of stack and 1 more instruction but given the improvements to other functions this seems like the right tradeoff. Since this is an ABI change for the outlined check functions they have been renamed. Unfortunately we cannot change the register for the v1 (non short granules) check because the runtime assumes that the shadow base register is stored in x9, so the v1 check still uses x9. With this change code size of /system/lib64/*.so in an Android build with HWASan goes from 200066976 bytes to 194085912 bytes, or a 3% decrease. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D90422 Files: clang/docs/HardwareAssistedAddressSanitizerDesign.rst compiler-rt/test/hwasan/TestCases/register-dump-read.c llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp llvm/lib/Target/AArch64/AArch64InstrInfo.td llvm/test/CodeGen/AArch64/hwasan-check-memaccess.ll
Index: llvm/test/CodeGen/AArch64/hwasan-check-memaccess.ll =================================================================== --- llvm/test/CodeGen/AArch64/hwasan-check-memaccess.ll +++ llvm/test/CodeGen/AArch64/hwasan-check-memaccess.ll @@ -18,12 +18,13 @@ define i8* @f2(i8* %x0, i8* %x1) { ; CHECK: f2: - ; CHECK: str x30, [sp, #-16]! + ; CHECK: stp x30, x20, [sp, #-16]! ; CHECK-NEXT: .cfi_def_cfa_offset 16 + ; CHECK-NEXT: .cfi_offset w20, -8 ; CHECK-NEXT: .cfi_offset w30, -16 - ; CHECK-NEXT: mov x9, x1 - ; CHECK-NEXT: bl __hwasan_check_x0_2_short - ; CHECK-NEXT: ldr x30, [sp], #16 + ; CHECK-NEXT: mov x20, x1 + ; CHECK-NEXT: bl __hwasan_check_x0_2_short_v2 + ; CHECK-NEXT: ldp x30, x20, [sp], #16 ; CHECK-NEXT: ret call void @llvm.hwasan.check.memaccess.shortgranules(i8* %x1, i8* %x0, i32 2) ret i8* %x0 @@ -32,13 +33,13 @@ declare void @llvm.hwasan.check.memaccess(i8*, i8*, i32) declare void @llvm.hwasan.check.memaccess.shortgranules(i8*, i8*, i32) -; CHECK: .section .text.hot,"axG",@progbits,__hwasan_check_x0_2_short,comdat -; CHECK-NEXT: .type __hwasan_check_x0_2_short,@function -; CHECK-NEXT: .weak __hwasan_check_x0_2_short -; CHECK-NEXT: .hidden __hwasan_check_x0_2_short -; CHECK-NEXT: __hwasan_check_x0_2_short: +; CHECK: .section .text.hot,"axG",@progbits,__hwasan_check_x0_2_short_v2,comdat +; CHECK-NEXT: .type __hwasan_check_x0_2_short_v2,@function +; CHECK-NEXT: .weak __hwasan_check_x0_2_short_v2 +; CHECK-NEXT: .hidden __hwasan_check_x0_2_short_v2 +; CHECK-NEXT: __hwasan_check_x0_2_short_v2: ; CHECK-NEXT: ubfx x16, x0, #4, #52 -; CHECK-NEXT: ldrb w16, [x9, x16] +; CHECK-NEXT: ldrb w16, [x20, x16] ; CHECK-NEXT: cmp x16, x0, lsr #56 ; CHECK-NEXT: b.ne .Ltmp0 ; CHECK-NEXT: .Ltmp1: Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1123,9 +1123,12 @@ (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo), [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>, Sched<[]>; +} + +let Uses = [ X20 ], Defs = [ X16, X17, LR, NZCV ] in { def HWASAN_CHECK_MEMACCESS_SHORTGRANULES : Pseudo< (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo), - [(int_hwasan_check_memaccess_shortgranules X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>, + [(int_hwasan_check_memaccess_shortgranules X20, GPR64noip:$ptr, (i32 timm:$accessinfo))]>, Sched<[]>; } Index: llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -302,7 +302,7 @@ std::string SymName = "__hwasan_check_x" + utostr(Reg - AArch64::X0) + "_" + utostr(AccessInfo); if (IsShort) - SymName += "_short"; + SymName += "_short_v2"; Sym = OutContext.getOrCreateSymbol(SymName); } @@ -354,13 +354,14 @@ .addImm(4) .addImm(55), *STI); - OutStreamer->emitInstruction(MCInstBuilder(AArch64::LDRBBroX) - .addReg(AArch64::W16) - .addReg(AArch64::X9) - .addReg(AArch64::X16) - .addImm(0) - .addImm(0), - *STI); + OutStreamer->emitInstruction( + MCInstBuilder(AArch64::LDRBBroX) + .addReg(AArch64::W16) + .addReg(IsShort ? AArch64::X20 : AArch64::X9) + .addReg(AArch64::X16) + .addImm(0) + .addImm(0), + *STI); OutStreamer->emitInstruction( MCInstBuilder(AArch64::SUBSXrs) .addReg(AArch64::XZR) Index: compiler-rt/test/hwasan/TestCases/register-dump-read.c =================================================================== --- compiler-rt/test/hwasan/TestCases/register-dump-read.c +++ compiler-rt/test/hwasan/TestCases/register-dump-read.c @@ -15,7 +15,7 @@ __hwasan_enable_allocator_tagging(); char * volatile x = (char*) malloc(10); asm volatile("mov x10, #0x2222\n" - "mov x20, #0x3333\n" + "mov x23, #0x3333\n" "mov x27, #0x4444\n"); return x[16]; @@ -35,8 +35,8 @@ // CHECK-SAME: x11{{[ ]+[0-9a-f]{16}$}} // CHECK-NEXT: x12{{[ ]+[0-9a-f]{16}[ ]}}x13{{[ ]+[0-9a-f]{16}[ ]}}x14{{[ ]+[0-9a-f]{16}[ ]}}x15{{[ ]+[0-9a-f]{16}$}} // CHECK-NEXT: x16{{[ ]+[0-9a-f]{16}[ ]}}x17{{[ ]+[0-9a-f]{16}[ ]}}x18{{[ ]+[0-9a-f]{16}[ ]}}x19{{[ ]+[0-9a-f]{16}$}} - // CHECK-NEXT: x20 0000000000003333 - // CHECK-SAME: x21{{[ ]+[0-9a-f]{16}[ ]}}x22{{[ ]+[0-9a-f]{16}[ ]}}x23{{[ ]+[0-9a-f]{16}$}} + // CHECK-NEXT: x20{{[ ]+[0-9a-f]{16}[ ]}}x21{{[ ]+[0-9a-f]{16}[ ]}}x22{{[ ]+[0-9a-f]{16}[ ]}} + // CHECK-SAME: x23 0000000000003333{{$}} // CHECK-NEXT: x24{{[ ]+[0-9a-f]{16}[ ]}}x25{{[ ]+[0-9a-f]{16}[ ]}}x26{{[ ]+[0-9a-f]{16}[ ]}} // CHECK-SAME: x27 0000000000004444 // CHECK-NEXT: x28{{[ ]+[0-9a-f]{16}[ ]}}x29{{[ ]+[0-9a-f]{16}[ ]}}x30{{[ ]+[0-9a-f]{16}$}} Index: clang/docs/HardwareAssistedAddressSanitizerDesign.rst =================================================================== --- clang/docs/HardwareAssistedAddressSanitizerDesign.rst +++ clang/docs/HardwareAssistedAddressSanitizerDesign.rst @@ -84,20 +84,20 @@ // clang -O2 --target=aarch64-linux-android30 -fsanitize=hwaddress -S -o - load.c [...] foo: - str x30, [sp, #-16]! - adrp x9, :got:__hwasan_shadow // load shadow address from GOT into x9 - ldr x9, [x9, :got_lo12:__hwasan_shadow] - bl __hwasan_check_x0_2_short // call outlined tag check - // (arguments: x0 = address, x9 = shadow base; + stp x30, x20, [sp, #-16]! + adrp x20, :got:__hwasan_shadow // load shadow address from GOT into x20 + ldr x20, [x20, :got_lo12:__hwasan_shadow] + bl __hwasan_check_x0_2_short_v2 // call outlined tag check + // (arguments: x0 = address, x20 = shadow base; // "2" encodes the access type and size) ldr w0, [x0] // inline load - ldr x30, [sp], #16 + ldp x30, x20, [sp], #16 ret [...] - __hwasan_check_x0_2_short: + __hwasan_check_x0_2_short_v2: ubfx x16, x0, #4, #52 // shadow offset - ldrb w16, [x9, x16] // load shadow tag + ldrb w16, [x20, x16] // load shadow tag cmp x16, x0, lsr #56 // extract address tag, compare with shadow tag b.ne .Ltmp0 // jump to short tag handler on mismatch .Ltmp1:
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits