https://github.com/eddyz87 updated https://github.com/llvm/llvm-project/pull/84410
>From 503c1abc8dd63ec1500d1ed867a4bfefc1aed062 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman <eddy...@gmail.com> Date: Fri, 26 Jan 2024 04:18:32 +0200 Subject: [PATCH 1/4] [BPF] Add addr_space_cast BPF instruction This commit aims to support BPF arena kernel side feature [0]: - arena is a memory region accessible from both BPF program and userspace; - base pointers for this memory region differ between kernel and user spaces; - `dst_reg = addr_space_cast(src_reg, dst_addr_space, src_addr_space)` translates src_reg, a pointer in src_addr_space to dst_reg, equivalent pointer in dst_addr_space, {src,dst}_addr_space are immediate constants; - number 0 is assigned to kernel address space; - number 1 is assigned to user address space. On the LLVM side, the goal is to make load and store operations on arena pointers "transparent" for BPF programs: - assume that pointers with non-zero address space are pointers to arena memory; - assume that arena is identified by address space number; - assume that address space zero corresponds to kernel address space; - assume that every BPF-side load or store from arena is done via pointer in user address space, thus convert base pointers using `addr_space_cast(src_reg, 0, 1)`; Only load, store, cmpxchg and atomicrmw IR instructions are handled by this transformation. For example, the following C code: #define __as __attribute__((address_space(1))) void copy(int __as *from, int __as *to) { *to = *from; } Compiled to the following IR: define void @copy(ptr addrspace(1) %from, ptr addrspace(1) %to) { entry: %0 = load i32, ptr addrspace(1) %from, align 4 store i32 %0, ptr addrspace(1) %to, align 4 ret void } Is transformed to: %to2 = addrspacecast ptr addrspace(1) %to to ptr ;; ! %from1 = addrspacecast ptr addrspace(1) %from to ptr ;; ! %0 = load i32, ptr %from1, align 4, !tbaa !3 store i32 %0, ptr %to2, align 4, !tbaa !3 ret void And compiled as: r2 = addr_space_cast(r2, 0, 1) r1 = addr_space_cast(r1, 0, 1) r1 = *(u32 *)(r1 + 0) *(u32 *)(r2 + 0) = r1 exit Internally: - piggy-back `BPFCheckAndAdjustIR` pass to insert address space casts for base pointer of memory access instructions, when base pointer has non-zero address space; - modify `BPFInstrInfo.td` and `BPFIselLowering.cpp` to allow translation of `addrspacecast` instruction: - define new machine instruction: `ADDR_SPACE_CAST`; - define pattern to select `ADDR_SPACE_CAST` for `addrspacecast` ISD nodes. [0] https://lore.kernel.org/bpf/20240206220441.38311-1-alexei.starovoi...@gmail.com/ --- .../lib/Target/BPF/AsmParser/BPFAsmParser.cpp | 1 + llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp | 104 ++++++++++++++++++ llvm/lib/Target/BPF/BPFInstrInfo.td | 29 +++++ .../test/CodeGen/BPF/addr-space-auto-casts.ll | 78 +++++++++++++ llvm/test/CodeGen/BPF/addr-space-cast.ll | 22 ++++ llvm/test/CodeGen/BPF/addr-space-gep-chain.ll | 25 +++++ llvm/test/CodeGen/BPF/addr-space-phi.ll | 53 +++++++++ .../test/CodeGen/BPF/assembler-disassembler.s | 7 ++ 8 files changed, 319 insertions(+) create mode 100644 llvm/test/CodeGen/BPF/addr-space-auto-casts.ll create mode 100644 llvm/test/CodeGen/BPF/addr-space-cast.ll create mode 100644 llvm/test/CodeGen/BPF/addr-space-gep-chain.ll create mode 100644 llvm/test/CodeGen/BPF/addr-space-phi.ll diff --git a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp index 0d1eef60c3b550..3145bc3d19f5dc 100644 --- a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp +++ b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp @@ -271,6 +271,7 @@ struct BPFOperand : public MCParsedAsmOperand { .Case("xchg32_32", true) .Case("cmpxchg_64", true) .Case("cmpxchg32_32", true) + .Case("addr_space_cast", true) .Default(false); } }; diff --git a/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp b/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp index 81effc9b1db46c..d39a6b57aafed4 100644 --- a/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp +++ b/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp @@ -14,6 +14,8 @@ // optimizations are done and those builtins can be removed. // - remove llvm.bpf.getelementptr.and.load builtins. // - remove llvm.bpf.getelementptr.and.store builtins. +// - for loads and stores with base addresses from non-zero address space +// cast base address to zero address space (support for BPF arenas). // //===----------------------------------------------------------------------===// @@ -55,6 +57,7 @@ class BPFCheckAndAdjustIR final : public ModulePass { bool removeCompareBuiltin(Module &M); bool sinkMinMax(Module &M); bool removeGEPBuiltins(Module &M); + bool insertASpaceCasts(Module &M); }; } // End anonymous namespace @@ -416,11 +419,112 @@ bool BPFCheckAndAdjustIR::removeGEPBuiltins(Module &M) { return Changed; } +// Wrap ToWrap with cast to address space zero: +// - if ToWrap is a getelementptr, +// wrap it's base pointer instead and return a copy; +// - if ToWrap is Instruction, insert address space cast +// immediately after ToWrap; +// - if ToWrap is not an Instruction (function parameter +// or a global value), insert address space cast at the +// beginning of the Function F; +// - use Cache to avoid inserting too many casts; +static Value *aspaceWrapValue(DenseMap<Value *, Value *> &Cache, Function *F, + Value *ToWrap) { + auto It = Cache.find(ToWrap); + if (It != Cache.end()) + return It->getSecond(); + + if (auto *GEP = dyn_cast<GetElementPtrInst>(ToWrap)) { + Value *Ptr = GEP->getPointerOperand(); + Value *WrappedPtr = aspaceWrapValue(Cache, F, Ptr); + auto *GEPTy = cast<PointerType>(GEP->getType()); + auto *NewGEP = GEP->clone(); + NewGEP->insertAfter(GEP); + NewGEP->mutateType(GEPTy->getPointerTo(0)); + NewGEP->setOperand(GEP->getPointerOperandIndex(), WrappedPtr); + NewGEP->setName(GEP->getName()); + Cache[ToWrap] = NewGEP; + return NewGEP; + } + + IRBuilder IB(F->getContext()); + if (Instruction *InsnPtr = dyn_cast<Instruction>(ToWrap)) + IB.SetInsertPoint(*InsnPtr->getInsertionPointAfterDef()); + else + IB.SetInsertPoint(F->getEntryBlock().getFirstInsertionPt()); + auto *PtrTy = cast<PointerType>(ToWrap->getType()); + auto *ASZeroPtrTy = PtrTy->getPointerTo(0); + auto *ACast = IB.CreateAddrSpaceCast(ToWrap, ASZeroPtrTy, ToWrap->getName()); + Cache[ToWrap] = ACast; + return ACast; +} + +// Wrap a pointer operand OpNum of instruction I +// with cast to address space zero +static void aspaceWrapOperand(DenseMap<Value *, Value *> &Cache, Instruction *I, + unsigned OpNum) { + Value *OldOp = I->getOperand(OpNum); + if (OldOp->getType()->getPointerAddressSpace() == 0) + return; + + Value *NewOp = aspaceWrapValue(Cache, I->getFunction(), OldOp); + I->setOperand(OpNum, NewOp); + // Check if there are any remaining users of old GEP, + // delete those w/o users + for (;;) { + auto *OldGEP = dyn_cast<GetElementPtrInst>(OldOp); + if (!OldGEP) + break; + if (!OldGEP->use_empty()) + break; + OldOp = OldGEP->getPointerOperand(); + OldGEP->eraseFromParent(); + } +} + +// Support for BPF arenas: +// - for each function in the module M, update pointer operand of +// each memory access instruction (load/store/cmpxchg/atomicrmw) +// by casting it from non-zero address space to zero address space, e.g: +// +// (load (ptr addrspace (N) %p) ...) +// -> (load (addrspacecast ptr addrspace (N) %p to ptr)) +// +// - assign section with name .arena.N for globals defined in +// non-zero address space N +bool BPFCheckAndAdjustIR::insertASpaceCasts(Module &M) { + bool Changed = false; + for (Function &F : M) { + DenseMap<Value *, Value *> CastsCache; + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + unsigned PtrOpNum; + + if (auto *LD = dyn_cast<LoadInst>(&I)) + PtrOpNum = LD->getPointerOperandIndex(); + else if (auto *ST = dyn_cast<StoreInst>(&I)) + PtrOpNum = ST->getPointerOperandIndex(); + else if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(&I)) + PtrOpNum = CmpXchg->getPointerOperandIndex(); + else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I)) + PtrOpNum = RMW->getPointerOperandIndex(); + else + continue; + + aspaceWrapOperand(CastsCache, &I, PtrOpNum); + } + } + Changed |= !CastsCache.empty(); + } + return Changed; +} + bool BPFCheckAndAdjustIR::adjustIR(Module &M) { bool Changed = removePassThroughBuiltin(M); Changed = removeCompareBuiltin(M) || Changed; Changed = sinkMinMax(M) || Changed; Changed = removeGEPBuiltins(M) || Changed; + Changed = insertASpaceCasts(M) || Changed; return Changed; } diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td index 82d34702310668..7198e9499bc32a 100644 --- a/llvm/lib/Target/BPF/BPFInstrInfo.td +++ b/llvm/lib/Target/BPF/BPFInstrInfo.td @@ -420,6 +420,35 @@ let Predicates = [BPFHasMovsx] in { } } +def ADDR_SPACE_CAST + : ALU_RR<BPF_ALU64, BPF_MOV, 64, + (outs GPR:$dst), + (ins GPR:$src, i64imm:$dst_as, i64imm:$src_as), + "$dst = addr_space_cast($src, $dst_as, $src_as)", + []> { + bits<64> dst_as; + bits<64> src_as; + + let Inst{47-32} = 1; + let Inst{31-16} = dst_as{15-0}; + let Inst{15-0} = src_as{15-0}; +} + +def SrcAddrSpace : SDNodeXForm<addrspacecast, [{ + return CurDAG->getTargetConstant( + cast<AddrSpaceCastSDNode>(N)->getSrcAddressSpace(), + SDLoc(N), MVT::i64); +}]>; + +def DstAddrSpace : SDNodeXForm<addrspacecast, [{ + return CurDAG->getTargetConstant( + cast<AddrSpaceCastSDNode>(N)->getDestAddressSpace(), + SDLoc(N), MVT::i64); +}]>; + +def : Pat<(addrspacecast:$this GPR:$src), + (ADDR_SPACE_CAST $src, (DstAddrSpace $this), (SrcAddrSpace $this))>; + def FI_ri : TYPE_LD_ST<BPF_IMM.Value, BPF_DW.Value, (outs GPR:$dst), diff --git a/llvm/test/CodeGen/BPF/addr-space-auto-casts.ll b/llvm/test/CodeGen/BPF/addr-space-auto-casts.ll new file mode 100644 index 00000000000000..08e11e861c71cb --- /dev/null +++ b/llvm/test/CodeGen/BPF/addr-space-auto-casts.ll @@ -0,0 +1,78 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt --bpf-check-and-opt-ir -S -mtriple=bpf-pc-linux < %s | FileCheck %s + +define void @simple_store(ptr addrspace(272) %foo) { +; CHECK-LABEL: define void @simple_store( +; CHECK-SAME: ptr addrspace(272) [[FOO:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[FOO1:%.*]] = addrspacecast ptr addrspace(272) [[FOO]] to ptr +; CHECK-NEXT: [[ADD_PTR2:%.*]] = getelementptr inbounds i8, ptr [[FOO1]], i64 16 +; CHECK-NEXT: store volatile i32 57005, ptr [[ADD_PTR2]], align 4 +; CHECK-NEXT: [[ADD_PTR13:%.*]] = getelementptr inbounds i8, ptr [[FOO1]], i64 12 +; CHECK-NEXT: store volatile i32 48879, ptr [[ADD_PTR13]], align 4 +; CHECK-NEXT: ret void +; +entry: + %add.ptr = getelementptr inbounds i8, ptr addrspace(272) %foo, i64 16 + store volatile i32 57005, ptr addrspace(272) %add.ptr, align 4 + %add.ptr1 = getelementptr inbounds i8, ptr addrspace(272) %foo, i64 12 + store volatile i32 48879, ptr addrspace(272) %add.ptr1, align 4 + ret void +} + +define void @separate_addr_store(ptr addrspace(272) %foo, ptr addrspace(272) %bar) { +; CHECK-LABEL: define void @separate_addr_store( +; CHECK-SAME: ptr addrspace(272) [[FOO:%.*]], ptr addrspace(272) [[BAR:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BAR3:%.*]] = addrspacecast ptr addrspace(272) [[BAR]] to ptr +; CHECK-NEXT: [[FOO1:%.*]] = addrspacecast ptr addrspace(272) [[FOO]] to ptr +; CHECK-NEXT: [[ADD_PTR2:%.*]] = getelementptr inbounds i8, ptr [[FOO1]], i64 16 +; CHECK-NEXT: store volatile i32 57005, ptr [[ADD_PTR2]], align 4 +; CHECK-NEXT: [[ADD_PTR14:%.*]] = getelementptr inbounds i8, ptr [[BAR3]], i64 12 +; CHECK-NEXT: store volatile i32 48879, ptr [[ADD_PTR14]], align 4 +; CHECK-NEXT: ret void +; +entry: + %add.ptr = getelementptr inbounds i8, ptr addrspace(272) %foo, i64 16 + store volatile i32 57005, ptr addrspace(272) %add.ptr, align 4 + %add.ptr1 = getelementptr inbounds i8, ptr addrspace(272) %bar, i64 12 + store volatile i32 48879, ptr addrspace(272) %add.ptr1, align 4 + ret void +} + +define i32 @simple_load(ptr addrspace(272) %foo) { +; CHECK-LABEL: define i32 @simple_load( +; CHECK-SAME: ptr addrspace(272) [[FOO:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[FOO1:%.*]] = addrspacecast ptr addrspace(272) [[FOO]] to ptr +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[FOO1]], align 4 +; CHECK-NEXT: ret i32 [[TMP0]] +; +entry: + %0 = load i32, ptr addrspace(272) %foo, align 4 + ret i32 %0 +} + +define { i32, i1 } @simple_cmpxchg(ptr addrspace(1) %i) { +; CHECK-LABEL: define { i32, i1 } @simple_cmpxchg( +; CHECK-SAME: ptr addrspace(1) [[I:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[I1:%.*]] = addrspacecast ptr addrspace(1) [[I]] to ptr +; CHECK-NEXT: [[A:%.*]] = cmpxchg ptr [[I1]], i32 7, i32 42 monotonic monotonic, align 4 +; CHECK-NEXT: ret { i32, i1 } [[A]] +; +entry: + %a = cmpxchg ptr addrspace(1) %i, i32 7, i32 42 monotonic monotonic, align 4 + ret { i32, i1 } %a +} + +define void @simple_atomicrmw(ptr addrspace(1) %p) { +; CHECK-LABEL: define void @simple_atomicrmw( +; CHECK-SAME: ptr addrspace(1) [[P:%.*]]) { +; CHECK-NEXT: [[P1:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr +; CHECK-NEXT: [[A:%.*]] = atomicrmw add ptr [[P1]], i64 42 monotonic, align 8 +; CHECK-NEXT: ret void +; + %a = atomicrmw add ptr addrspace(1) %p, i64 42 monotonic, align 8 + ret void +} diff --git a/llvm/test/CodeGen/BPF/addr-space-cast.ll b/llvm/test/CodeGen/BPF/addr-space-cast.ll new file mode 100644 index 00000000000000..ad2860d8038ea7 --- /dev/null +++ b/llvm/test/CodeGen/BPF/addr-space-cast.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -march=bpfel -mcpu=v4 -filetype=asm -show-mc-encoding < %s | FileCheck %s + +define ptr addrspace(1) @foo(ptr %p) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: r0 = addr_space_cast(r1, 1, 0) # encoding: [0xbf,0x10,0x01,0x00,0x00,0x00,0x01,0x00] +; CHECK-NEXT: exit # encoding: [0x95,0x00,0x00,0x00,0x00,0x00,0x00,0x00] +entry: + %0 = addrspacecast ptr %p to ptr addrspace(1) + ret ptr addrspace(1) %0 +} + +define ptr @bar(ptr addrspace(1) %p) { +; CHECK-LABEL: bar: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: r0 = addr_space_cast(r1, 0, 1) # encoding: [0xbf,0x10,0x01,0x00,0x01,0x00,0x00,0x00] +; CHECK-NEXT: exit # encoding: [0x95,0x00,0x00,0x00,0x00,0x00,0x00,0x00] +entry: + %0 = addrspacecast ptr addrspace(1) %p to ptr + ret ptr %0 +} diff --git a/llvm/test/CodeGen/BPF/addr-space-gep-chain.ll b/llvm/test/CodeGen/BPF/addr-space-gep-chain.ll new file mode 100644 index 00000000000000..3ac85fb9b12662 --- /dev/null +++ b/llvm/test/CodeGen/BPF/addr-space-gep-chain.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt --bpf-check-and-opt-ir -S -mtriple=bpf-pc-linux < %s | FileCheck %s + +define void @test(ptr addrspace(1) %p) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ptr addrspace(1) [[P:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr +; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 8 +; CHECK-NEXT: [[B3:%.*]] = getelementptr inbounds i8, ptr [[A2]], i64 16 +; CHECK-NEXT: [[C4:%.*]] = getelementptr inbounds i8, ptr [[B3]], i64 24 +; CHECK-NEXT: [[D5:%.*]] = getelementptr inbounds i8, ptr [[C4]], i64 32 +; CHECK-NEXT: store i64 11, ptr [[C4]], align 8 +; CHECK-NEXT: store i64 22, ptr [[D5]], align 8 +; CHECK-NEXT: ret void +; +entry: + %a = getelementptr inbounds i8, ptr addrspace(1) %p, i64 8 + %b = getelementptr inbounds i8, ptr addrspace(1) %a, i64 16 + %c = getelementptr inbounds i8, ptr addrspace(1) %b, i64 24 + %d = getelementptr inbounds i8, ptr addrspace(1) %c, i64 32 + store i64 11, ptr addrspace(1) %c, align 8 + store i64 22, ptr addrspace(1) %d, align 8 + ret void +} diff --git a/llvm/test/CodeGen/BPF/addr-space-phi.ll b/llvm/test/CodeGen/BPF/addr-space-phi.ll new file mode 100644 index 00000000000000..6d28b071f28086 --- /dev/null +++ b/llvm/test/CodeGen/BPF/addr-space-phi.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt --bpf-check-and-opt-ir -S -mtriple=bpf-pc-linux < %s | FileCheck %s + +; Generated from the following C code: +; +; #define __uptr __attribute__((address_space(1))) +; +; extern int __uptr *magic1(); +; extern int __uptr *magic2(); +; +; void test(long i) { +; int __uptr *a; +; +; if (i > 42) +; a = magic1(); +; else +; a = magic2(); +; a[5] = 7; +; } +; +; Using the following command: +; +; clang --target=bpf -O2 -S -emit-llvm -o t.ll t.c + +define void @test(i64 noundef %i) { +; CHECK: if.end: +; CHECK-NEXT: [[A_0:%.*]] = phi ptr addrspace(1) +; CHECK-NEXT: [[A_01:%.*]] = addrspacecast ptr addrspace(1) [[A_0]] to ptr +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A_01]], i64 5 +; CHECK-NEXT: store i32 7, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: ret void +; +entry: + %cmp = icmp sgt i64 %i, 42 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %call = tail call ptr addrspace(1) @magic1() + br label %if.end + +if.else: ; preds = %entry + %call1 = tail call ptr addrspace(1) @magic2() + br label %if.end + +if.end: ; preds = %if.else, %if.then + %a.0 = phi ptr addrspace(1) [ %call, %if.then ], [ %call1, %if.else ] + %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %a.0, i64 5 + store i32 7, ptr addrspace(1) %arrayidx, align 4 + ret void +} + +declare ptr addrspace(1) @magic1(...) +declare ptr addrspace(1) @magic2(...) diff --git a/llvm/test/CodeGen/BPF/assembler-disassembler.s b/llvm/test/CodeGen/BPF/assembler-disassembler.s index 2bc7421c2471c2..991d6edc683a30 100644 --- a/llvm/test/CodeGen/BPF/assembler-disassembler.s +++ b/llvm/test/CodeGen/BPF/assembler-disassembler.s @@ -289,3 +289,10 @@ r0 = *(u32*)skb[42] r0 = *(u8*)skb[r1] r0 = *(u16*)skb[r1] r0 = *(u32*)skb[r1] + +// CHECK: bf 10 01 00 01 00 00 00 r0 = addr_space_cast(r1, 0x0, 0x1) +// CHECK: bf 21 01 00 00 00 01 00 r1 = addr_space_cast(r2, 0x1, 0x0) +// CHECK: bf 43 01 00 2a 00 07 00 r3 = addr_space_cast(r4, 0x7, 0x2a) +r0 = addr_space_cast(r1, 0, 1) +r1 = addr_space_cast(r2, 1, 0) +r3 = addr_space_cast(r4, 7, 42) >From a1d3ff0fc5b81ddb4cd355aaa90b18ee69b425bd Mon Sep 17 00:00:00 2001 From: Eduard Zingerman <eddy...@gmail.com> Date: Tue, 30 Jan 2024 22:20:43 +0200 Subject: [PATCH 2/4] [BPF] Merge global variables with address space to .arena.N section Make it so that all globals within same address space reside in section with name ".arena.N", where N is number of the address space. E.g. for the following C program: ```c __as const char a[2] = {1,2}; __as char b[2] = {3,4}; __as char c[2]; ... ``` Generate the following layout: ``` $ clang -O2 --target=bpf t.c -c -o - \ | llvm-readelf --sections --symbols - ... Section Headers: [Nr] Name Type Address Off Size ES Flg Lk Inf Al ... [ 4] .arena.272 PROGBITS 0000000000000000 0000e8 000018 00 WA 0 0 4 ... Symbol table '.symtab' contains 8 entries: Num: Value Size Type Bind Vis Ndx Name ... 3: 0000000000000000 8 OBJECT GLOBAL DEFAULT 4 a 4: 0000000000000008 8 OBJECT GLOBAL DEFAULT 4 b 5: 0000000000000010 8 OBJECT GLOBAL DEFAULT 4 c ... ^^^ Note section index ``` --- llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp | 12 ++++++++ llvm/test/CodeGen/BPF/addr-space-globals.ll | 30 ++++++++++++++++++++ llvm/test/CodeGen/BPF/addr-space-globals2.ll | 25 ++++++++++++++++ 3 files changed, 67 insertions(+) create mode 100644 llvm/test/CodeGen/BPF/addr-space-globals.ll create mode 100644 llvm/test/CodeGen/BPF/addr-space-globals2.ll diff --git a/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp b/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp index d39a6b57aafed4..edd59aaa6d01d2 100644 --- a/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp +++ b/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp @@ -516,6 +516,18 @@ bool BPFCheckAndAdjustIR::insertASpaceCasts(Module &M) { } Changed |= !CastsCache.empty(); } + // Merge all globals within same address space into single + // .arena.<addr space no> section + for (GlobalVariable &G : M.globals()) { + if (G.getAddressSpace() == 0 || G.hasSection()) + continue; + SmallString<16> SecName; + raw_svector_ostream OS(SecName); + OS << ".arena." << G.getAddressSpace(); + G.setSection(SecName); + // Prevent having separate section for constants + G.setConstant(false); + } return Changed; } diff --git a/llvm/test/CodeGen/BPF/addr-space-globals.ll b/llvm/test/CodeGen/BPF/addr-space-globals.ll new file mode 100644 index 00000000000000..878ba0dfce6cd1 --- /dev/null +++ b/llvm/test/CodeGen/BPF/addr-space-globals.ll @@ -0,0 +1,30 @@ +; RUN: llc -march=bpfel -mcpu=v4 < %s | FileCheck %s + +; Generated from the following C code: +; +; #define __as __attribute__((address_space(272))) +; __as const char a[2] = {1,2}; +; __as char b[2] = {3,4}; +; __as char c[2]; +; +; Using the following command: +; +; clang --target=bpf -O2 -S -emit-llvm -o t.ll t.c + + +@a = dso_local local_unnamed_addr addrspace(272) constant [2 x i8] [i8 1, i8 2], align 1 +@b = dso_local local_unnamed_addr addrspace(272) global [2 x i8] [i8 3, i8 4], align 1 +@c = dso_local local_unnamed_addr addrspace(272) global [2 x i8] zeroinitializer, align 1 + +; Verify that a,b,c reside in the same section + +; CHECK: .section .arena.272,"aw",@progbits +; CHECK-NOT: .section +; CHECK: .globl a +; CHECK: .ascii "\001\002" +; CHECK-NOT: .section +; CHECK: .globl b +; CHECK: .ascii "\003\004" +; CHECK-NOT: .section +; CHECK: .globl c +; CHECK: .zero 2 diff --git a/llvm/test/CodeGen/BPF/addr-space-globals2.ll b/llvm/test/CodeGen/BPF/addr-space-globals2.ll new file mode 100644 index 00000000000000..d1e2318948751e --- /dev/null +++ b/llvm/test/CodeGen/BPF/addr-space-globals2.ll @@ -0,0 +1,25 @@ +; RUN: llc -march=bpfel -mcpu=v4 < %s | FileCheck %s + +; Generated from the following C code: +; +; __attribute__((address_space(1))) char a[2] = {1,2}; +; __attribute__((address_space(2))) char b[2] = {3,4}; +; +; Using the following command: +; +; clang --target=bpf -O2 -S -emit-llvm -o t.ll t.c + +@a = dso_local local_unnamed_addr addrspace(1) global [2 x i8] [i8 1, i8 2], align 1 +@b = dso_local local_unnamed_addr addrspace(2) global [2 x i8] [i8 3, i8 4], align 1 + +; Verify that a,b reside in separate sections + +; CHECK: .section .arena.1,"aw",@progbits +; CHECK-NOT: .section +; CHECK: .globl a +; CHECK: .ascii "\001\002" + +; CHECK: .section .arena.2,"aw",@progbits +; CHECK-NOT: .section +; CHECK: .globl b +; CHECK: .ascii "\003\004" >From ceeb0f570ffcf62994e112399176a29b18a6b434 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman <eddy...@gmail.com> Date: Fri, 2 Feb 2024 21:05:31 +0200 Subject: [PATCH 3/4] [BPF] Convert 'addrcast M->N -> GEP -> addrcast N->M' to just GEP For BPF GEP would adjust pointer using same offset in any address space, thus transformation of form: %inner = addrspacecast N->M %ptr %gep = getelementptr %inner, ... %outer = addrspacecast M->N %gep to just: %gep = getelementptr %ptr, ... is valid. Applying such transformation helps with C patterns that use e.g. (void *) casts to offsets w/o actual memory access: #define container_of(ptr, type, member) \ ({ \ void __arena *__mptr = (void *)(ptr); \ ((type *)(__mptr - offsetof(type, member))); \ }) (Note the address space cast on first body line) --- llvm/lib/Target/BPF/BPF.h | 8 ++ .../Target/BPF/BPFASpaceCastSimplifyPass.cpp | 92 +++++++++++++++++++ llvm/lib/Target/BPF/BPFTargetMachine.cpp | 5 + llvm/lib/Target/BPF/CMakeLists.txt | 1 + .../test/CodeGen/BPF/addr-space-simplify-1.ll | 19 ++++ .../test/CodeGen/BPF/addr-space-simplify-2.ll | 21 +++++ .../test/CodeGen/BPF/addr-space-simplify-3.ll | 26 ++++++ .../test/CodeGen/BPF/addr-space-simplify-4.ll | 21 +++++ .../test/CodeGen/BPF/addr-space-simplify-5.ll | 25 +++++ 9 files changed, 218 insertions(+) create mode 100644 llvm/lib/Target/BPF/BPFASpaceCastSimplifyPass.cpp create mode 100644 llvm/test/CodeGen/BPF/addr-space-simplify-1.ll create mode 100644 llvm/test/CodeGen/BPF/addr-space-simplify-2.ll create mode 100644 llvm/test/CodeGen/BPF/addr-space-simplify-3.ll create mode 100644 llvm/test/CodeGen/BPF/addr-space-simplify-4.ll create mode 100644 llvm/test/CodeGen/BPF/addr-space-simplify-5.ll diff --git a/llvm/lib/Target/BPF/BPF.h b/llvm/lib/Target/BPF/BPF.h index 5c77d183e1ef3d..bbdbdbbde53228 100644 --- a/llvm/lib/Target/BPF/BPF.h +++ b/llvm/lib/Target/BPF/BPF.h @@ -66,6 +66,14 @@ class BPFIRPeepholePass : public PassInfoMixin<BPFIRPeepholePass> { static bool isRequired() { return true; } }; +class BPFASpaceCastSimplifyPass + : public PassInfoMixin<BPFASpaceCastSimplifyPass> { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } +}; + class BPFAdjustOptPass : public PassInfoMixin<BPFAdjustOptPass> { public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); diff --git a/llvm/lib/Target/BPF/BPFASpaceCastSimplifyPass.cpp b/llvm/lib/Target/BPF/BPFASpaceCastSimplifyPass.cpp new file mode 100644 index 00000000000000..f87b299bbba658 --- /dev/null +++ b/llvm/lib/Target/BPF/BPFASpaceCastSimplifyPass.cpp @@ -0,0 +1,92 @@ +//===-- BPFASpaceCastSimplifyPass.cpp - BPF addrspacecast simplications --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "BPF.h" +#include <optional> + +#define DEBUG_TYPE "bpf-aspace-simplify" + +using namespace llvm; + +namespace { + +struct CastGEPCast { + AddrSpaceCastInst *OuterCast; + + // Match chain of instructions: + // %inner = addrspacecast N->M + // %gep = getelementptr %inner, ... + // %outer = addrspacecast M->N %gep + // Where I is %outer. + static std::optional<CastGEPCast> match(Value *I) { + auto *OuterCast = dyn_cast<AddrSpaceCastInst>(I); + if (!OuterCast) + return std::nullopt; + auto *GEP = dyn_cast<GetElementPtrInst>(OuterCast->getPointerOperand()); + if (!GEP) + return std::nullopt; + auto *InnerCast = dyn_cast<AddrSpaceCastInst>(GEP->getPointerOperand()); + if (!InnerCast) + return std::nullopt; + if (InnerCast->getSrcAddressSpace() != OuterCast->getDestAddressSpace()) + return std::nullopt; + if (InnerCast->getDestAddressSpace() != OuterCast->getSrcAddressSpace()) + return std::nullopt; + return CastGEPCast{OuterCast}; + } + + static PointerType *changeAddressSpace(PointerType *Ty, unsigned AS) { + return Ty->get(Ty->getContext(), AS); + } + + // Assuming match(this->OuterCast) is true, convert: + // (addrspacecast M->N (getelementptr (addrspacecast N->M ptr) ...)) + // To: + // (getelementptr ptr ...) + GetElementPtrInst *rewrite() { + auto *GEP = cast<GetElementPtrInst>(OuterCast->getPointerOperand()); + auto *InnerCast = cast<AddrSpaceCastInst>(GEP->getPointerOperand()); + unsigned AS = OuterCast->getDestAddressSpace(); + auto *NewGEP = cast<GetElementPtrInst>(GEP->clone()); + NewGEP->setName(GEP->getName()); + NewGEP->insertAfter(OuterCast); + NewGEP->setOperand(0, InnerCast->getPointerOperand()); + auto *GEPTy = cast<PointerType>(GEP->getType()); + NewGEP->mutateType(changeAddressSpace(GEPTy, AS)); + OuterCast->replaceAllUsesWith(NewGEP); + OuterCast->eraseFromParent(); + if (GEP->use_empty()) + GEP->eraseFromParent(); + if (InnerCast->use_empty()) + InnerCast->eraseFromParent(); + return NewGEP; + } +}; + +} // anonymous namespace + +PreservedAnalyses BPFASpaceCastSimplifyPass::run(Function &F, + FunctionAnalysisManager &AM) { + SmallVector<CastGEPCast, 16> WorkList; + bool Changed = false; + for (BasicBlock &BB : F) { + for (Instruction &I : BB) + if (auto It = CastGEPCast::match(&I)) + WorkList.push_back(It.value()); + Changed |= !WorkList.empty(); + + while (!WorkList.empty()) { + CastGEPCast InsnChain = WorkList.pop_back_val(); + GetElementPtrInst *NewGEP = InsnChain.rewrite(); + for (User *U : NewGEP->users()) + if (auto It = CastGEPCast::match(U)) + WorkList.push_back(It.value()); + } + } + return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); +} diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp index 08ac4b25540f70..5f26bec2e390c8 100644 --- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp +++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp @@ -121,6 +121,10 @@ void BPFTargetMachine::registerPassBuilderCallbacks( FPM.addPass(BPFPreserveStaticOffsetPass(false)); return true; } + if (PassName == "bpf-aspace-simplify") { + FPM.addPass(BPFASpaceCastSimplifyPass()); + return true; + } return false; }); PB.registerPipelineStartEPCallback( @@ -135,6 +139,7 @@ void BPFTargetMachine::registerPassBuilderCallbacks( PB.registerPeepholeEPCallback([=](FunctionPassManager &FPM, OptimizationLevel Level) { FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true))); + FPM.addPass(BPFASpaceCastSimplifyPass()); }); PB.registerScalarOptimizerLateEPCallback( [=](FunctionPassManager &FPM, OptimizationLevel Level) { diff --git a/llvm/lib/Target/BPF/CMakeLists.txt b/llvm/lib/Target/BPF/CMakeLists.txt index d88e7ade40b9a0..cb21ed03a86c1e 100644 --- a/llvm/lib/Target/BPF/CMakeLists.txt +++ b/llvm/lib/Target/BPF/CMakeLists.txt @@ -24,6 +24,7 @@ add_llvm_target(BPFCodeGen BPFAbstractMemberAccess.cpp BPFAdjustOpt.cpp BPFAsmPrinter.cpp + BPFASpaceCastSimplifyPass.cpp BPFCheckAndAdjustIR.cpp BPFFrameLowering.cpp BPFInstrInfo.cpp diff --git a/llvm/test/CodeGen/BPF/addr-space-simplify-1.ll b/llvm/test/CodeGen/BPF/addr-space-simplify-1.ll new file mode 100644 index 00000000000000..32d67284d1c1b7 --- /dev/null +++ b/llvm/test/CodeGen/BPF/addr-space-simplify-1.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes=bpf-aspace-simplify -mtriple=bpf-pc-linux -S < %s | FileCheck %s + +; Check that bpf-aspace-simplify pass removes unnecessary (for BPF) +; address space casts for cast M->N -> GEP -> cast N->M chain. + +define dso_local ptr addrspace(1) @test (ptr addrspace(1) %p) { +; CHECK-LABEL: define dso_local ptr addrspace(1) @test( +; CHECK-SAME: ptr addrspace(1) [[P:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[P]], i64 8 +; CHECK-NEXT: ret ptr addrspace(1) [[B1]] +; + entry: + %a = addrspacecast ptr addrspace(1) %p to ptr + %b = getelementptr inbounds i8, ptr %a, i64 8 + %c = addrspacecast ptr %b to ptr addrspace(1) + ret ptr addrspace(1) %c +} diff --git a/llvm/test/CodeGen/BPF/addr-space-simplify-2.ll b/llvm/test/CodeGen/BPF/addr-space-simplify-2.ll new file mode 100644 index 00000000000000..a2965554a97330 --- /dev/null +++ b/llvm/test/CodeGen/BPF/addr-space-simplify-2.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes=bpf-aspace-simplify -mtriple=bpf-pc-linux -S < %s | FileCheck %s + +; Check that bpf-aspace-simplify pass does not change +; chain 'cast M->N -> GEP -> cast N->K'. + +define dso_local ptr addrspace(2) @test (ptr addrspace(1) %p) { +; CHECK-LABEL: define dso_local ptr addrspace(2) @test( +; CHECK-SAME: ptr addrspace(1) [[P:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr +; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8 +; CHECK-NEXT: [[C:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(2) +; CHECK-NEXT: ret ptr addrspace(2) [[C]] +; + entry: + %a = addrspacecast ptr addrspace(1) %p to ptr + %b = getelementptr inbounds i8, ptr %a, i64 8 + %c = addrspacecast ptr %b to ptr addrspace(2) + ret ptr addrspace(2) %c +} diff --git a/llvm/test/CodeGen/BPF/addr-space-simplify-3.ll b/llvm/test/CodeGen/BPF/addr-space-simplify-3.ll new file mode 100644 index 00000000000000..a7736c462b44b3 --- /dev/null +++ b/llvm/test/CodeGen/BPF/addr-space-simplify-3.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes=bpf-aspace-simplify -mtriple=bpf-pc-linux -S < %s | FileCheck %s + +; Check that when bpf-aspace-simplify pass modifies chain +; 'cast M->N -> GEP -> cast N->M' it does not remove GEP, +; when that GEP is used by some other instruction. + +define dso_local ptr addrspace(1) @test (ptr addrspace(1) %p) { +; CHECK-LABEL: define dso_local ptr addrspace(1) @test( +; CHECK-SAME: ptr addrspace(1) [[P:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr +; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8 +; CHECK-NEXT: [[B1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[P]], i64 8 +; CHECK-NEXT: call void @sink(ptr [[B]]) +; CHECK-NEXT: ret ptr addrspace(1) [[B1]] +; + entry: + %a = addrspacecast ptr addrspace(1) %p to ptr + %b = getelementptr inbounds i8, ptr %a, i64 8 + %c = addrspacecast ptr %b to ptr addrspace(1) + call void @sink(ptr %b) + ret ptr addrspace(1) %c +} + +declare dso_local void @sink(ptr) diff --git a/llvm/test/CodeGen/BPF/addr-space-simplify-4.ll b/llvm/test/CodeGen/BPF/addr-space-simplify-4.ll new file mode 100644 index 00000000000000..b2c384bbb6abd3 --- /dev/null +++ b/llvm/test/CodeGen/BPF/addr-space-simplify-4.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes=bpf-aspace-simplify -mtriple=bpf-pc-linux -S < %s | FileCheck %s + +; Check that bpf-aspace-simplify pass simplifies chain +; 'cast K->M -> cast M->N -> GEP -> cast N->M -> cast M->K'. + +define dso_local ptr addrspace(2) @test (ptr addrspace(2) %p) { +; CHECK-LABEL: define dso_local ptr addrspace(2) @test( +; CHECK-SAME: ptr addrspace(2) [[P:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C12:%.*]] = getelementptr inbounds i8, ptr addrspace(2) [[P]], i64 8 +; CHECK-NEXT: ret ptr addrspace(2) [[C12]] +; + entry: + %a = addrspacecast ptr addrspace(2) %p to ptr addrspace(1) + %b = addrspacecast ptr addrspace(1) %a to ptr + %c = getelementptr inbounds i8, ptr %b, i64 8 + %d = addrspacecast ptr %c to ptr addrspace(1) + %e = addrspacecast ptr addrspace (1) %d to ptr addrspace(2) + ret ptr addrspace(2) %e +} diff --git a/llvm/test/CodeGen/BPF/addr-space-simplify-5.ll b/llvm/test/CodeGen/BPF/addr-space-simplify-5.ll new file mode 100644 index 00000000000000..b62d25384d9583 --- /dev/null +++ b/llvm/test/CodeGen/BPF/addr-space-simplify-5.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes=bpf-aspace-simplify -mtriple=bpf-pc-linux -S < %s | FileCheck %s + +; Check that bpf-aspace-simplify pass removes unnecessary (for BPF) +; address space casts for cast M->N -> GEP -> cast N->M chain, +; where chain is split between several BBs. + +define dso_local ptr addrspace(1) @test (ptr addrspace(1) %p) { +; CHECK-LABEL: define dso_local ptr addrspace(1) @test( +; CHECK-SAME: ptr addrspace(1) [[P:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[B1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[P]], i64 8 +; CHECK-NEXT: ret ptr addrspace(1) [[B1]] +; +entry: + %a = addrspacecast ptr addrspace(1) %p to ptr + %b = getelementptr inbounds i8, ptr %a, i64 8 + br label %exit + +exit: + %c = addrspacecast ptr %b to ptr addrspace(1) + ret ptr addrspace(1) %c +} >From 98663e28aa15512644b1f4ec54f06300a77371ce Mon Sep 17 00:00:00 2001 From: Eduard Zingerman <eddy...@gmail.com> Date: Tue, 30 Jan 2024 04:11:36 +0200 Subject: [PATCH 4/4] [BPF][CLANG] Front-end support for __BPF_FEATURE_ARENA_CAST macro `__BPF_FEATURE_ARENA_CAST` macro is defined if compiler supports emission of `cast_kern` and `cast_user` BPF instructions. --- clang/lib/Basic/Targets/BPF.cpp | 3 +++ clang/test/Preprocessor/bpf-predefined-macros.c | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/clang/lib/Basic/Targets/BPF.cpp b/clang/lib/Basic/Targets/BPF.cpp index e3fbbb720d0694..26a54f631fcfc4 100644 --- a/clang/lib/Basic/Targets/BPF.cpp +++ b/clang/lib/Basic/Targets/BPF.cpp @@ -35,6 +35,9 @@ void BPFTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__BPF_CPU_VERSION__", "0"); return; } + + Builder.defineMacro("__BPF_FEATURE_ARENA_CAST"); + if (CPU.empty() || CPU == "generic" || CPU == "v1") { Builder.defineMacro("__BPF_CPU_VERSION__", "1"); return; diff --git a/clang/test/Preprocessor/bpf-predefined-macros.c b/clang/test/Preprocessor/bpf-predefined-macros.c index ff4d00ac3bcfcc..fea24d1ea0ff7b 100644 --- a/clang/test/Preprocessor/bpf-predefined-macros.c +++ b/clang/test/Preprocessor/bpf-predefined-macros.c @@ -61,6 +61,9 @@ int r; #ifdef __BPF_FEATURE_ST int s; #endif +#ifdef __BPF_FEATURE_ARENA_CAST +int t; +#endif // CHECK: int b; // CHECK: int c; @@ -90,6 +93,11 @@ int s; // CPU_V4: int r; // CPU_V4: int s; +// CPU_V1: int t; +// CPU_V2: int t; +// CPU_V3: int t; +// CPU_V4: int t; + // CPU_GENERIC: int g; // CPU_PROBE: int f; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits