https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/79191
>From 3c091285f7f437c7284eb70d8735b0ad7e123804 Mon Sep 17 00:00:00 2001 From: Rose <83477269+ataridre...@users.noreply.github.com> Date: Tue, 23 Jan 2024 13:59:05 -0500 Subject: [PATCH] [X86] Use unaligned atomic load and stores The backend supports it now, so we can use it. --- clang/lib/CodeGen/CGObjC.cpp | 5 +- clang/test/CodeGenObjC/objc_copyStruct.m | 3 +- clang/test/CodeGenObjC/property-aggregate.m | 15 +-- llvm/lib/Target/X86/X86ISelLowering.cpp | 1 + llvm/test/CodeGen/X86/unaligned-atomic-ops.ll | 92 +++++++++++++++++++ 5 files changed, 99 insertions(+), 17 deletions(-) create mode 100644 llvm/test/CodeGen/X86/unaligned-atomic-ops.ll diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp index 03fc0ec7ff54e1..debfc84f49e484 100644 --- a/clang/lib/CodeGen/CGObjC.cpp +++ b/clang/lib/CodeGen/CGObjC.cpp @@ -846,8 +846,9 @@ static void emitStructGetterCall(CodeGenFunction &CGF, ObjCIvarDecl *ivar, /// accesses. They don't have to be fast, just faster than a function /// call and a mutex. static bool hasUnalignedAtomics(llvm::Triple::ArchType arch) { - // FIXME: Allow unaligned atomic load/store on x86. (It is not - // currently supported by the backend.) + // x86 is the only one so far that we know support this as of now + if (arch == llvm::Triple::x86 || arch == llvm::Triple::x86_64) + return true; return false; } diff --git a/clang/test/CodeGenObjC/objc_copyStruct.m b/clang/test/CodeGenObjC/objc_copyStruct.m index 7bbad866e2b1fb..8e52815a308abc 100644 --- a/clang/test/CodeGenObjC/objc_copyStruct.m +++ b/clang/test/CodeGenObjC/objc_copyStruct.m @@ -2,7 +2,7 @@ // RUN: %clang -target x86_64-apple-ios -fobjc-runtime=ios -Wno-objc-root-class -S -o - -emit-llvm %s | FileCheck %s struct S { - float f, g; + double f, g; }; @interface I @@ -13,4 +13,3 @@ @implementation I @end // CHECK: declare {{.*}}void @objc_copyStruct(ptr, ptr, i64, i1, i1) - diff --git a/clang/test/CodeGenObjC/property-aggregate.m b/clang/test/CodeGenObjC/property-aggregate.m index f4211b6b62bd50..4c8c8893f920f4 100644 --- a/clang/test/CodeGenObjC/property-aggregate.m +++ b/clang/test/CodeGenObjC/property-aggregate.m @@ -1,13 +1,8 @@ // RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm %s -o - | FileCheck %s -// This structure's size is not a power of two, so the property does -// not get native atomics, even though x86-64 can do unaligned atomics -// with a lock prefix. struct s3 { char c[3]; }; // This structure's size is, so it does, because it can. -// FIXME: But we don't at the moment; the backend doesn't know how to generate -// correct code. struct s4 { char c[4]; }; @interface Test0 @@ -18,14 +13,8 @@ @implementation Test0 @synthesize s3, s4; @end -// CHECK: define internal i24 @"\01-[Test0 s3]"( -// CHECK: call void @objc_copyStruct -// CHECK: define internal void @"\01-[Test0 setS3:]"( -// CHECK: call void @objc_copyStruct -// CHECK: define internal i32 @"\01-[Test0 s4]"( -// CHECK: call void @objc_copyStruct -// CHECK: define internal void @"\01-[Test0 setS4:]"( -// CHECK: call void @objc_copyStruct +//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// CHECK: {{.*}} diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 5cc2803b280879..eacaf6bbd2296a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -107,6 +107,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setSchedulingPreference(Sched::RegPressure); const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister()); + setSupportsUnalignedAtomics(true); // Bypass expensive divides and use cheaper ones. if (TM.getOptLevel() >= CodeGenOptLevel::Default) { diff --git a/llvm/test/CodeGen/X86/unaligned-atomic-ops.ll b/llvm/test/CodeGen/X86/unaligned-atomic-ops.ll new file mode 100644 index 00000000000000..9e5173ff2b37e6 --- /dev/null +++ b/llvm/test/CodeGen/X86/unaligned-atomic-ops.ll @@ -0,0 +1,92 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s -mtriple=i386-apple-darwin10.0 -mcpu=core2 -relocation-model=dynamic-no-pic | FileCheck -check-prefix=I386 %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -mcpu=core2 -relocation-model=dynamic-no-pic | FileCheck -check-prefix=CORE2 %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -mcpu=corei7 -relocation-model=dynamic-no-pic | FileCheck -check-prefix=COREI7 %s + +; This verifies that the middle end can handle an unaligned atomic load. +; +; In the past, an assertion inside the SelectionDAGBuilder would always +; hit an assertion for unaligned loads and stores. + +%AtomicI16 = type { %CellI16, [0 x i8] } +%CellI16 = type { i16, [0 x i8] } + +; CHECK-LABEL: foo +; CHECK: ret +define void @foo(%AtomicI16* %self) { +; I386-LABEL: foo: +; I386: ## %bb.0: ## %start +; I386-NEXT: pushl %esi +; I386-NEXT: .cfi_def_cfa_offset 8 +; I386-NEXT: subl $24, %esp +; I386-NEXT: .cfi_def_cfa_offset 32 +; I386-NEXT: .cfi_offset %esi, -8 +; I386-NEXT: movl {{[0-9]+}}(%esp), %esi +; I386-NEXT: leal {{[0-9]+}}(%esp), %eax +; I386-NEXT: movl %eax, {{[0-9]+}}(%esp) +; I386-NEXT: movl %esi, {{[0-9]+}}(%esp) +; I386-NEXT: movl $5, {{[0-9]+}}(%esp) +; I386-NEXT: movl $2, (%esp) +; I386-NEXT: calll ___atomic_load +; I386-NEXT: movw $5, {{[0-9]+}}(%esp) +; I386-NEXT: leal {{[0-9]+}}(%esp), %eax +; I386-NEXT: movl %eax, {{[0-9]+}}(%esp) +; I386-NEXT: movl %esi, {{[0-9]+}}(%esp) +; I386-NEXT: movl $5, {{[0-9]+}}(%esp) +; I386-NEXT: movl $2, (%esp) +; I386-NEXT: calll ___atomic_store +; I386-NEXT: addl $24, %esp +; I386-NEXT: popl %esi +; I386-NEXT: retl +; +; CORE2-LABEL: foo: +; CORE2: ## %bb.0: ## %start +; CORE2-NEXT: pushq %rbx +; CORE2-NEXT: .cfi_def_cfa_offset 16 +; CORE2-NEXT: subq $16, %rsp +; CORE2-NEXT: .cfi_def_cfa_offset 32 +; CORE2-NEXT: .cfi_offset %rbx, -16 +; CORE2-NEXT: movq %rdi, %rbx +; CORE2-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; CORE2-NEXT: movl $2, %edi +; CORE2-NEXT: movq %rbx, %rsi +; CORE2-NEXT: movl $5, %ecx +; CORE2-NEXT: callq ___atomic_load +; CORE2-NEXT: movw $5, {{[0-9]+}}(%rsp) +; CORE2-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; CORE2-NEXT: movl $2, %edi +; CORE2-NEXT: movq %rbx, %rsi +; CORE2-NEXT: movl $5, %ecx +; CORE2-NEXT: callq ___atomic_store +; CORE2-NEXT: addq $16, %rsp +; CORE2-NEXT: popq %rbx +; CORE2-NEXT: retq +; +; COREI7-LABEL: foo: +; COREI7: ## %bb.0: ## %start +; COREI7-NEXT: pushq %rbx +; COREI7-NEXT: .cfi_def_cfa_offset 16 +; COREI7-NEXT: subq $16, %rsp +; COREI7-NEXT: .cfi_def_cfa_offset 32 +; COREI7-NEXT: .cfi_offset %rbx, -16 +; COREI7-NEXT: movq %rdi, %rbx +; COREI7-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; COREI7-NEXT: movl $2, %edi +; COREI7-NEXT: movq %rbx, %rsi +; COREI7-NEXT: movl $5, %ecx +; COREI7-NEXT: callq ___atomic_load +; COREI7-NEXT: movw $5, {{[0-9]+}}(%rsp) +; COREI7-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; COREI7-NEXT: movl $2, %edi +; COREI7-NEXT: movq %rbx, %rsi +; COREI7-NEXT: movl $5, %ecx +; COREI7-NEXT: callq ___atomic_store +; COREI7-NEXT: addq $16, %rsp +; COREI7-NEXT: popq %rbx +; COREI7-NEXT: retq +start: + %a = getelementptr inbounds %AtomicI16, %AtomicI16* %self, i16 0, i32 0, i32 0 + load atomic i16, i16* %a seq_cst, align 1 + store atomic i16 5, i16* %a seq_cst, align 1 + ret void +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits