Author: Bill Wendling
Date: 2023-01-09T12:09:00-08:00
New Revision: 74d3ba1af5c09b85331c90105c461484762ee3e4
URL:
https://github.com/llvm/llvm-project/commit/74d3ba1af5c09b85331c90105c461484762ee3e4
DIFF:
https://github.com/llvm/llvm-project/commit/74d3ba1af5c09b85331c90105c461484762ee3e4.diff
LOG: [X86] Don't zero out %eax if both %al and %ah are used
The iterator over super and sub registers doesn't include both 8-bit
registers in its list. So if both registers are used and only one of
them is live on return, then we need to make sure that the other 8-bit
register is also marked as live and not zeroed out.
Reviewed By: nickdesaulniers
Differential Revision: https://reviews.llvm.org/D139679
(cherry picked from commit 14d4cddc5506fb0fd3c4ac556b4edd970aa151eb)
Added:
llvm/test/CodeGen/X86/zero-call-used-regs-i386.ll
Modified:
llvm/lib/CodeGen/PrologEpilogInserter.cpp
Removed:
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 85d051cfdbe71..a8d40edd88d3a 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -1237,7 +1237,13 @@ void PEI::insertZeroCallUsedRegs(MachineFunction &MF) {
if (!MO.isReg())
continue;
-for (MCPhysReg SReg : TRI.sub_and_superregs_inclusive(MO.getReg()))
+MCRegister Reg = MO.getReg();
+
+// This picks up sibling registers (e.q. %al -> %ah).
+for (MCRegUnitIterator Unit(Reg, &TRI); Unit.isValid(); ++Unit)
+ RegsToZero.reset(*Unit);
+
+for (MCPhysReg SReg : TRI.sub_and_superregs_inclusive(Reg))
RegsToZero.reset(SReg);
}
}
diff --git a/llvm/test/CodeGen/X86/zero-call-used-regs-i386.ll
b/llvm/test/CodeGen/X86/zero-call-used-regs-i386.ll
new file mode 100644
index 0..33e501ca8503c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/zero-call-used-regs-i386.ll
@@ -0,0 +1,112 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -opaque-pointers | FileCheck
%s --check-prefix=I386
+;
+; Make sure we don't zero out %eax when both %ah and %al are used.
+;
+; PR1766: https://github.com/ClangBuiltLinux/linux/issues/1766
+
+%struct.maple_subtree_state = type { ptr }
+
+@mas_data_end_type = dso_local local_unnamed_addr global i32 0, align 4
+@ma_meta_end_mn_0_0_0_0_0_0 = dso_local local_unnamed_addr global i8 0, align 1
+@mt_pivots_0 = dso_local local_unnamed_addr global i8 0, align 1
+@mas_data_end___trans_tmp_2 = dso_local local_unnamed_addr global ptr null,
align 4
+@mt_slots_0 = dso_local local_unnamed_addr global i8 0, align 1
+
+define dso_local zeroext i1 @test1(ptr nocapture noundef readonly %0)
local_unnamed_addr "zero-call-used-regs"="used-gpr" nounwind {
+; I386-LABEL: test1:
+; I386: # %bb.0:
+; I386-NEXT:pushl %ebx
+; I386-NEXT:subl $24, %esp
+; I386-NEXT:movl {{[0-9]+}}(%esp), %eax
+; I386-NEXT:movl (%eax), %eax
+; I386-NEXT:movzbl (%eax), %ebx
+; I386-NEXT:calll bar
+; I386-NEXT:testb %al, %al
+; I386-NEXT:# implicit-def: $al
+; I386-NEXT:# kill: killed $al
+; I386-NEXT:je .LBB0_6
+; I386-NEXT: # %bb.1:
+; I386-NEXT:cmpl $0, mas_data_end_type
+; I386-NEXT:je .LBB0_3
+; I386-NEXT: # %bb.2:
+; I386-NEXT:movzbl ma_meta_end_mn_0_0_0_0_0_0, %eax
+; I386-NEXT:movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; I386-NEXT:jmp .LBB0_6
+; I386-NEXT: .LBB0_3:
+; I386-NEXT:movb mt_pivots_0, %ah
+; I386-NEXT:movb %ah, %al
+; I386-NEXT:decb %al
+; I386-NEXT:movl mas_data_end___trans_tmp_2, %ecx
+; I386-NEXT:movsbl %al, %edx
+; I386-NEXT:cmpl $0, (%ecx,%edx,4)
+; I386-NEXT:je .LBB0_5
+; I386-NEXT: # %bb.4:
+; I386-NEXT:movb %al, %ah
+; I386-NEXT: .LBB0_5:
+; I386-NEXT:movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; I386-NEXT: .LBB0_6:
+; I386-NEXT:movb mt_slots_0, %bh
+; I386-NEXT:leal {{[0-9]+}}(%esp), %eax
+; I386-NEXT:movl %eax, (%esp)
+; I386-NEXT:calll baz
+; I386-NEXT:subl $4, %esp
+; I386-NEXT:cmpb %bh, %bl
+; I386-NEXT:jae .LBB0_8
+; I386-NEXT: # %bb.7:
+; I386-NEXT:movsbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; I386-NEXT:movl %eax, (%esp)
+; I386-NEXT:calll gaz
+; I386-NEXT: .LBB0_8:
+; I386-NEXT:movb $1, %al
+; I386-NEXT:addl $24, %esp
+; I386-NEXT:popl %ebx
+; I386-NEXT:xorl %ecx, %ecx
+; I386-NEXT:xorl %edx, %edx
+; I386-NEXT:retl
+ %2 = alloca %struct.maple_subtree_state, align 4
+ %3 = load ptr, ptr %0, align 4
+ %4 = load i8, ptr %3, align 1
+ %5 = tail call zeroext i1 @bar()
+ br i1 %5, label %6, label %20
+
+6:; preds = %1
+ %7 = load i32, ptr @mas_data_end_type, align 4
+ %8 = icmp eq i32 %7, 0
+ br i1 %8, label %11, label %9
+
+9: