yonghong-song updated this revision to Diff 308884. yonghong-song edited the summary of this revision. yonghong-song added a comment.
- remove -mcpu=v4. - for new instructions (except xadd), for 32bit mode, only alu32 mode is supported. I chose this way since we have -mcpu=v3 for a while which has alu32 as default. We really want to promote alu32 mode. The new kernel which supports atomic op definitely supports alu32 well... Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D72184/new/ https://reviews.llvm.org/D72184 Files: llvm/lib/Target/BPF/BPFInstrFormats.td llvm/lib/Target/BPF/BPFInstrInfo.td llvm/lib/Target/BPF/BPFMIChecking.cpp llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp llvm/test/CodeGen/BPF/atomics.ll llvm/test/CodeGen/BPF/atomics_2.ll llvm/test/CodeGen/BPF/xadd.ll
Index: llvm/test/CodeGen/BPF/xadd.ll =================================================================== --- llvm/test/CodeGen/BPF/xadd.ll +++ llvm/test/CodeGen/BPF/xadd.ll @@ -1,7 +1,5 @@ ; RUN: not --crash llc -march=bpfel < %s 2>&1 | FileCheck %s ; RUN: not --crash llc -march=bpfeb < %s 2>&1 | FileCheck %s -; RUN: not --crash llc -march=bpfel -mattr=+alu32 < %s 2>&1 | FileCheck %s -; RUN: not --crash llc -march=bpfeb -mattr=+alu32 < %s 2>&1 | FileCheck %s ; This file is generated with the source command and source ; $ clang -target bpf -O2 -g -S -emit-llvm t.c Index: llvm/test/CodeGen/BPF/atomics_2.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/BPF/atomics_2.ll @@ -0,0 +1,254 @@ +; RUN: llc < %s -march=bpfel -mcpu=v3 -verify-machineinstrs -show-mc-encoding | FileCheck %s +; +; Source: +; int test_load_add_32(int *p, int v) { +; return __sync_fetch_and_add(p, v); +; } +; int test_load_add_64(long *p, long v) { +; return __sync_fetch_and_add(p, v); +; } +; int test_load_sub_32(int *p, int v) { +; return __sync_fetch_and_sub(p, v); +; } +; int test_load_sub_64(long *p, long v) { +; return __sync_fetch_and_sub(p, v); +; } +; // from https://gcc.gnu.org/onlinedocs/gcc-4.1.1/gcc/Atomic-Builtins.html +; // __sync_lock_test_and_set() actually does atomic xchg and returns +; // old contents. +; int test_xchg_32(int *p, int v) { +; return __sync_lock_test_and_set(p, v); +; } +; int test_xchg_64(long *p, long v) { +; return __sync_lock_test_and_set(p, v); +; } +; int test_cas_32(int *p, int old, int new) { +; return __sync_val_compare_and_swap(p, old, new); +; } +; long test_cas_64(long *p, long old, long new) { +; return __sync_val_compare_and_swap(p, old, new); +; } +; int test_load_and_32(int *p, int v) { +; return __sync_fetch_and_and(p, v); +; } +; int test_load_and_64(long *p, long v) { +; return __sync_fetch_and_and(p, v); +; } +; int test_load_or_32(int *p, int v) { +; return __sync_fetch_and_or(p, v); +; } +; int test_load_or_64(long *p, long v) { +; return __sync_fetch_and_or(p, v); +; } +; int test_load_xor_32(int *p, int v) { +; return __sync_fetch_and_xor(p, v); +; } +; int test_load_xor_64(long *p, long v) { +; return __sync_fetch_and_xor(p, v); +; } +; int test_atomic_xor_32(int *p, int v) { +; __sync_fetch_and_xor(p, v); +; return 0; +; } +; int test_atomic_xor_64(long *p, long v) { +; __sync_fetch_and_xor(p, v); +; return 0; +; } +; int test_atomic_and_64(long *p, long v) { +; __sync_fetch_and_and(p, v); +; return 0; +; } +; int test_atomic_or_64(long *p, long v) { +; __sync_fetch_and_or(p, v); +; return 0; +; } + +; CHECK-LABEL: test_load_add_32 +; CHECK: w0 = w2 +; CHECK: w0 = atomic_fetch_add((u32 *)(r1 + 0), w0) +; CHECK: encoding: [0xc3,0x01,0x00,0x00,0x01,0x00,0x00,0x00] +define dso_local i32 @test_load_add_32(i32* nocapture %p, i32 %v) local_unnamed_addr { +entry: + %0 = atomicrmw add i32* %p, i32 %v seq_cst + ret i32 %0 +} + +; CHECK-LABEL: test_load_add_64 +; CHECK: r0 = r2 +; CHECK: r0 = atomic_fetch_add((u64 *)(r1 + 0), r0) +; CHECK: encoding: [0xdb,0x01,0x00,0x00,0x01,0x00,0x00,0x00] +define dso_local i32 @test_load_add_64(i64* nocapture %p, i64 %v) local_unnamed_addr { +entry: + %0 = atomicrmw add i64* %p, i64 %v seq_cst + %conv = trunc i64 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: test_load_sub_32 +; CHECK: w0 = w2 +; CHECK: w0 = -w0 +; CHECK: w0 = atomic_fetch_add((u32 *)(r1 + 0), w0) +; CHECK: encoding: [0xc3,0x01,0x00,0x00,0x01,0x00,0x00,0x00] +define dso_local i32 @test_load_sub_32(i32* nocapture %p, i32 %v) local_unnamed_addr { +entry: + %0 = atomicrmw sub i32* %p, i32 %v seq_cst + ret i32 %0 +} + +; CHECK-LABEL: test_load_sub_64 +; CHECK: r0 = r2 +; CHECK: r0 = -r0 +; CHECK: r0 = atomic_fetch_add((u64 *)(r1 + 0), r0) +; CHECK: encoding: [0xdb,0x01,0x00,0x00,0x01,0x00,0x00,0x00] +define dso_local i32 @test_load_sub_64(i64* nocapture %p, i64 %v) local_unnamed_addr { +entry: + %0 = atomicrmw sub i64* %p, i64 %v seq_cst + %conv = trunc i64 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: test_xchg_32 +; CHECK: w0 = w2 +; CHECK: w0 = xchg32_32(r1 + 0, w0) +; CHECK: encoding: [0xc3,0x01,0x00,0x00,0xe1,0x00,0x00,0x00] +define dso_local i32 @test_xchg_32(i32* nocapture %p, i32 %v) local_unnamed_addr { +entry: + %0 = atomicrmw xchg i32* %p, i32 %v seq_cst + ret i32 %0 +} + +; CHECK-LABEL: test_xchg_64 +; CHECK: r0 = r2 +; CHECK: r0 = xchg_64(r1 + 0, r0) +; CHECK: encoding: [0xdb,0x01,0x00,0x00,0xe1,0x00,0x00,0x00] +define dso_local i32 @test_xchg_64(i64* nocapture %p, i64 %v) local_unnamed_addr { +entry: + %0 = atomicrmw xchg i64* %p, i64 %v seq_cst + %conv = trunc i64 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: test_cas_32 +; CHECK: w0 = w2 +; CHECK: w0 = cmpxchg32_32(r1 + 0, w0, w3) +; CHECK: encoding: [0xc3,0x31,0x00,0x00,0xf1,0x00,0x00,0x00] +define dso_local i32 @test_cas_32(i32* nocapture %p, i32 %old, i32 %new) local_unnamed_addr { +entry: + %0 = cmpxchg i32* %p, i32 %old, i32 %new seq_cst seq_cst + %1 = extractvalue { i32, i1 } %0, 0 + ret i32 %1 +} + +; CHECK-LABEL: test_cas_64 +; CHECK: r0 = r2 +; CHECK: r0 = cmpxchg_64(r1 + 0, r0, r3) +; CHECK: encoding: [0xdb,0x31,0x00,0x00,0xf1,0x00,0x00,0x00] +define dso_local i64 @test_cas_64(i64* nocapture %p, i64 %old, i64 %new) local_unnamed_addr { +entry: + %0 = cmpxchg i64* %p, i64 %old, i64 %new seq_cst seq_cst + %1 = extractvalue { i64, i1 } %0, 0 + ret i64 %1 +} + +; CHECK-LABEL: test_load_and_32 +; CHECK: w0 = w2 +; CHECK: w0 = atomic_fetch_and((u32 *)(r1 + 0), w0) +; CHECK: encoding: [0xc3,0x01,0x00,0x00,0x51,0x00,0x00,0x00] +define dso_local i32 @test_load_and_32(i32* nocapture %p, i32 %v) local_unnamed_addr { +entry: + %0 = atomicrmw and i32* %p, i32 %v seq_cst + ret i32 %0 +} + +; CHECK-LABEL: test_load_and_64 +; CHECK: r0 = r2 +; CHECK: r0 = atomic_fetch_and((u64 *)(r1 + 0), r0) +; CHECK: encoding: [0xdb,0x01,0x00,0x00,0x51,0x00,0x00,0x00] +define dso_local i32 @test_load_and_64(i64* nocapture %p, i64 %v) local_unnamed_addr { +entry: + %0 = atomicrmw and i64* %p, i64 %v seq_cst + %conv = trunc i64 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: test_load_or_32 +; CHECK: w0 = w2 +; CHECK: w0 = atomic_fetch_or((u32 *)(r1 + 0), w0) +; CHECK: encoding: [0xc3,0x01,0x00,0x00,0x41,0x00,0x00,0x00] +define dso_local i32 @test_load_or_32(i32* nocapture %p, i32 %v) local_unnamed_addr { +entry: + %0 = atomicrmw or i32* %p, i32 %v seq_cst + ret i32 %0 +} + +; CHECK-LABEL: test_load_or_64 +; CHECK: r0 = r2 +; CHECK: r0 = atomic_fetch_or((u64 *)(r1 + 0), r0) +; CHECK: encoding: [0xdb,0x01,0x00,0x00,0x41,0x00,0x00,0x00] +define dso_local i32 @test_load_or_64(i64* nocapture %p, i64 %v) local_unnamed_addr { +entry: + %0 = atomicrmw or i64* %p, i64 %v seq_cst + %conv = trunc i64 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: test_load_xor_32 +; CHECK: w0 = w2 +; CHECK: w0 = atomic_fetch_xor((u32 *)(r1 + 0), w0) +; CHECK: encoding: [0xc3,0x01,0x00,0x00,0xa1,0x00,0x00,0x00] +define dso_local i32 @test_load_xor_32(i32* nocapture %p, i32 %v) local_unnamed_addr { +entry: + %0 = atomicrmw xor i32* %p, i32 %v seq_cst + ret i32 %0 +} + +; CHECK-LABEL: test_load_xor_64 +; CHECK: r0 = r2 +; CHECK: r0 = atomic_fetch_xor((u64 *)(r1 + 0), r0) +; CHECK: encoding: [0xdb,0x01,0x00,0x00,0xa1,0x00,0x00,0x00] +define dso_local i32 @test_load_xor_64(i64* nocapture %p, i64 %v) local_unnamed_addr { +entry: + %0 = atomicrmw xor i64* %p, i64 %v seq_cst + %conv = trunc i64 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: test_atomic_xor_32 +; CHECK: lock *(u32 *)(r1 + 0) ^= w2 +; CHECK: encoding: [0xc3,0x21,0x00,0x00,0xa0,0x00,0x00,0x00] +; CHECK: w0 = 0 +define dso_local i32 @test_atomic_xor_32(i32* nocapture %p, i32 %v) local_unnamed_addr { +entry: + %0 = atomicrmw xor i32* %p, i32 %v seq_cst + ret i32 0 +} + +; CHECK-LABEL: test_atomic_xor_64 +; CHECK: lock *(u64 *)(r1 + 0) ^= r2 +; CHECK: encoding: [0xdb,0x21,0x00,0x00,0xa0,0x00,0x00,0x00] +; CHECK: w0 = 0 +define dso_local i32 @test_atomic_xor_64(i64* nocapture %p, i64 %v) local_unnamed_addr { +entry: + %0 = atomicrmw xor i64* %p, i64 %v seq_cst + ret i32 0 +} + +; CHECK-LABEL: test_atomic_and_64 +; CHECK: lock *(u64 *)(r1 + 0) &= r2 +; CHECK: encoding: [0xdb,0x21,0x00,0x00,0x50,0x00,0x00,0x00] +; CHECK: w0 = 0 +define dso_local i32 @test_atomic_and_64(i64* nocapture %p, i64 %v) local_unnamed_addr { +entry: + %0 = atomicrmw and i64* %p, i64 %v seq_cst + ret i32 0 +} + +; CHECK-LABEL: test_atomic_or_64 +; CHECK: lock *(u64 *)(r1 + 0) |= r2 +; CHECK: encoding: [0xdb,0x21,0x00,0x00,0x40,0x00,0x00,0x00] +; CHECK: w0 = 0 +define dso_local i32 @test_atomic_or_64(i64* nocapture %p, i64 %v) local_unnamed_addr { +entry: + %0 = atomicrmw or i64* %p, i64 %v seq_cst + ret i32 0 +} Index: llvm/test/CodeGen/BPF/atomics.ll =================================================================== --- llvm/test/CodeGen/BPF/atomics.ll +++ llvm/test/CodeGen/BPF/atomics.ll @@ -1,8 +1,11 @@ ; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding | FileCheck %s +; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding -mcpu=v3 | FileCheck --check-prefix=CHECK-V3 %s ; CHECK-LABEL: test_load_add_32 ; CHECK: lock *(u32 *)(r1 + 0) += r2 ; CHECK: encoding: [0xc3,0x21 +; CHECK-V3: lock *(u32 *)(r1 + 0) += w2 +; CHECK-V3: encoding: [0xc3,0x21,0x00,0x00,0x00,0x00,0x00,0x00] define void @test_load_add_32(i32* %p, i32 zeroext %v) { entry: atomicrmw add i32* %p, i32 %v seq_cst @@ -12,6 +15,8 @@ ; CHECK-LABEL: test_load_add_64 ; CHECK: lock *(u64 *)(r1 + 0) += r2 ; CHECK: encoding: [0xdb,0x21 +; CHECK-V3: lock *(u64 *)(r1 + 0) += r2 +; CHECK-V3: encoding: [0xdb,0x21,0x00,0x00,0x00,0x00,0x00,0x00] define void @test_load_add_64(i64* %p, i64 zeroext %v) { entry: atomicrmw add i64* %p, i64 %v seq_cst Index: llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp =================================================================== --- llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp +++ llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp @@ -159,12 +159,18 @@ uint64_t BPFMCCodeEmitter::getMemoryOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { + // For CMPXCHG instructions, output is implicitly in R0/W0, + // so memory operand starts from operand 0. + int MemOpStartIndex = 1, Opcode = MI.getOpcode(); + if (Opcode == BPF::CMPXCHGW32 || Opcode == BPF::CMPXCHGD) + MemOpStartIndex = 0; + uint64_t Encoding; - const MCOperand Op1 = MI.getOperand(1); + const MCOperand Op1 = MI.getOperand(MemOpStartIndex); assert(Op1.isReg() && "First operand is not register."); Encoding = MRI.getEncodingValue(Op1.getReg()); Encoding <<= 16; - MCOperand Op2 = MI.getOperand(2); + MCOperand Op2 = MI.getOperand(MemOpStartIndex + 1); assert(Op2.isImm() && "Second operand is not immediate."); Encoding |= Op2.getImm() & 0xffff; return Encoding; Index: llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp =================================================================== --- llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp +++ llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp @@ -58,7 +58,7 @@ BPF_MEM = 0x3, BPF_LEN = 0x4, BPF_MSH = 0x5, - BPF_XADD = 0x6 + BPF_ATOMIC = 0x6 }; BPFDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) @@ -176,7 +176,7 @@ uint8_t InstMode = getInstMode(Insn); if ((InstClass == BPF_LDX || InstClass == BPF_STX) && getInstSize(Insn) != BPF_DW && - (InstMode == BPF_MEM || InstMode == BPF_XADD) && + (InstMode == BPF_MEM || InstMode == BPF_ATOMIC) && STI.getFeatureBits()[BPF::ALU32]) Result = decodeInstruction(DecoderTableBPFALU3264, Instr, Insn, Address, this, STI); Index: llvm/lib/Target/BPF/BPFMIChecking.cpp =================================================================== --- llvm/lib/Target/BPF/BPFMIChecking.cpp +++ llvm/lib/Target/BPF/BPFMIChecking.cpp @@ -41,7 +41,7 @@ // Initialize class variables. void initialize(MachineFunction &MFParm); - void checkingIllegalXADD(void); + bool processAtomicInsts(void); public: @@ -49,7 +49,7 @@ bool runOnMachineFunction(MachineFunction &MF) override { if (!skipFunction(MF.getFunction())) { initialize(MF); - checkingIllegalXADD(); + return processAtomicInsts(); } return false; } @@ -151,7 +151,7 @@ return false; } -void BPFMIPreEmitChecking::checkingIllegalXADD(void) { +bool BPFMIPreEmitChecking::processAtomicInsts(void) { for (MachineBasicBlock &MBB : *MF) { for (MachineInstr &MI : MBB) { if (MI.getOpcode() != BPF::XADDW && @@ -172,7 +172,51 @@ } } - return; + // Check return values of atomic_fetch_and_{add,and,or,xor}. + // If the return is not used, the atomic_fetch_and_<op> instruction + // is replaced with atomic_<op> instruction. + MachineInstr* ToErase = nullptr; + bool Changed = false; + const BPFInstrInfo *TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo(); + for (MachineBasicBlock &MBB : *MF) { + for (MachineInstr &MI : MBB) { + if (ToErase) { + ToErase->eraseFromParent(); + ToErase = nullptr; + } + + if (MI.getOpcode() != BPF::XFADDW32 && MI.getOpcode() != BPF::XFADDD && + MI.getOpcode() != BPF::XFANDW32 && MI.getOpcode() != BPF::XFANDD && + MI.getOpcode() != BPF::XFXORW32 && MI.getOpcode() != BPF::XFXORD && + MI.getOpcode() != BPF::XFORW32 && MI.getOpcode() != BPF::XFORD) + continue; + + if (hasLiveDefs(MI, TRI)) + continue; + + LLVM_DEBUG(dbgs() << "Transforming "; MI.dump()); + unsigned newOpcode; + switch(MI.getOpcode()) { + case BPF::XFADDW32: newOpcode = BPF::XADDW32; break; + case BPF::XFADDD: newOpcode = BPF::XADDD; break; + case BPF::XFANDW32: newOpcode = BPF::XANDW32; break; + case BPF::XFANDD: newOpcode = BPF::XANDD; break; + case BPF::XFXORW32: newOpcode = BPF::XXORW32; break; + case BPF::XFXORD: newOpcode = BPF::XXORD; break; + case BPF::XFORW32: newOpcode = BPF::XORW32; break; + default: newOpcode = BPF::XORD; break; + } + + BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(newOpcode)) + .add(MI.getOperand(0)).add(MI.getOperand(1)) + .add(MI.getOperand(2)).add(MI.getOperand(3)); + + ToErase = &MI; + Changed = true; + } + } + + return Changed; } } // end default namespace Index: llvm/lib/Target/BPF/BPFInstrInfo.td =================================================================== --- llvm/lib/Target/BPF/BPFInstrInfo.td +++ llvm/lib/Target/BPF/BPFInstrInfo.td @@ -617,9 +617,9 @@ def : Pat<(i64 (extloadi32 ADDRri:$src)), (i64 (LDW ADDRri:$src))>; } -// Atomics +// Atomic XADD for BPFNoALU32 class XADD<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode> - : TYPE_LD_ST<BPF_XADD.Value, SizeOp.Value, + : TYPE_LD_ST<BPF_ATOMIC.Value, SizeOp.Value, (outs GPR:$dst), (ins MEMri:$addr, GPR:$val), "lock *("#OpcodeStr#" *)($addr) += $val", @@ -630,14 +630,88 @@ let Inst{51-48} = addr{19-16}; // base reg let Inst{55-52} = dst; let Inst{47-32} = addr{15-0}; // offset + let Inst{7-4} = BPF_ADD.Value; let BPFClass = BPF_STX; } -class XADD32<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode> - : TYPE_LD_ST<BPF_XADD.Value, SizeOp.Value, +let Constraints = "$dst = $val" in { + let Predicates = [BPFNoALU32] in { + def XADDW : XADD<BPF_W, "u32", atomic_load_add_32>; + } +} + +// Atomic add, and, or, xor +class ATOMIC_NOFETCH<BPFArithOp Opc, string Opstr> + : TYPE_LD_ST<BPF_ATOMIC.Value, BPF_DW.Value, + (outs GPR:$dst), + (ins MEMri:$addr, GPR:$val), + "lock *(u64 *)($addr) " #Opstr# "= $val", + []> { + bits<4> dst; + bits<20> addr; + + let Inst{51-48} = addr{19-16}; // base reg + let Inst{55-52} = dst; + let Inst{47-32} = addr{15-0}; // offset + let Inst{7-4} = Opc.Value; + let BPFClass = BPF_STX; +} + +class ATOMIC32_NOFETCH<BPFArithOp Opc, string Opstr> + : TYPE_LD_ST<BPF_ATOMIC.Value, BPF_W.Value, (outs GPR32:$dst), (ins MEMri:$addr, GPR32:$val), - "lock *("#OpcodeStr#" *)($addr) += $val", + "lock *(u32 *)($addr) " #Opstr# "= $val", + []> { + bits<4> dst; + bits<20> addr; + + let Inst{51-48} = addr{19-16}; // base reg + let Inst{55-52} = dst; + let Inst{47-32} = addr{15-0}; // offset + let Inst{7-4} = Opc.Value; + let BPFClass = BPF_STX; +} + +let Constraints = "$dst = $val" in { + let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in { + def XADDW32 : ATOMIC32_NOFETCH<BPF_ADD, "+">; + def XANDW32 : ATOMIC32_NOFETCH<BPF_AND, "&">; + def XORW32 : ATOMIC32_NOFETCH<BPF_OR, "|">; + def XXORW32 : ATOMIC32_NOFETCH<BPF_XOR, "^">; + } + + def XADDD : ATOMIC_NOFETCH<BPF_ADD, "+">; + def XANDD : ATOMIC_NOFETCH<BPF_AND, "&">; + def XORD : ATOMIC_NOFETCH<BPF_OR, "|">; + def XXORD : ATOMIC_NOFETCH<BPF_XOR, "^">; +} + +// Atomic Fetch-and-<add, and, or, xor> operations +class XFALU64<BPFWidthModifer SizeOp, BPFArithOp Opc, string OpcodeStr, + string OpcStr, PatFrag OpNode> + : TYPE_LD_ST<BPF_ATOMIC.Value, SizeOp.Value, + (outs GPR:$dst), + (ins MEMri:$addr, GPR:$val), + "$dst = atomic_fetch_"#OpcStr#"(("#OpcodeStr#" *)($addr), $val)", + [(set GPR:$dst, (OpNode ADDRri:$addr, GPR:$val))]> { + bits<4> dst; + bits<20> addr; + + let Inst{51-48} = addr{19-16}; // base reg + let Inst{55-52} = dst; + let Inst{47-32} = addr{15-0}; // offset + let Inst{7-4} = Opc.Value; + let Inst{3-0} = BPF_FETCH.Value; + let BPFClass = BPF_STX; +} + +class XFALU32<BPFWidthModifer SizeOp, BPFArithOp Opc, string OpcodeStr, + string OpcStr, PatFrag OpNode> + : TYPE_LD_ST<BPF_ATOMIC.Value, SizeOp.Value, + (outs GPR32:$dst), + (ins MEMri:$addr, GPR32:$val), + "$dst = atomic_fetch_"#OpcStr#"(("#OpcodeStr#" *)($addr), $val)", [(set GPR32:$dst, (OpNode ADDRri:$addr, GPR32:$val))]> { bits<4> dst; bits<20> addr; @@ -645,19 +719,117 @@ let Inst{51-48} = addr{19-16}; // base reg let Inst{55-52} = dst; let Inst{47-32} = addr{15-0}; // offset + let Inst{7-4} = Opc.Value; + let Inst{3-0} = BPF_FETCH.Value; let BPFClass = BPF_STX; } let Constraints = "$dst = $val" in { - let Predicates = [BPFNoALU32] in { - def XADDW : XADD<BPF_W, "u32", atomic_load_add_32>; + let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in { + def XFADDW32 : XFALU32<BPF_W, BPF_ADD, "u32", "add", atomic_load_add_32>; + def XFANDW32 : XFALU32<BPF_W, BPF_AND, "u32", "and", atomic_load_and_32>; + def XFORW32 : XFALU32<BPF_W, BPF_OR, "u32", "or", atomic_load_or_32>; + def XFXORW32 : XFALU32<BPF_W, BPF_XOR, "u32", "xor", atomic_load_xor_32>; } + def XFADDD : XFALU64<BPF_DW, BPF_ADD, "u64", "add", atomic_load_add_64>; + def XFANDD : XFALU64<BPF_DW, BPF_AND, "u64", "and", atomic_load_and_64>; + def XFORD : XFALU64<BPF_DW, BPF_OR, "u64", "or", atomic_load_or_64>; + def XFXORD : XFALU64<BPF_DW, BPF_XOR, "u64", "xor", atomic_load_xor_64>; +} + +// atomic_load_sub can be represented as a neg followed +// by an atomic_load_add. +def : Pat<(atomic_load_sub_32 ADDRri:$addr, GPR32:$val), + (XFADDW32 ADDRri:$addr, (NEG_32 GPR32:$val))>; +def : Pat<(atomic_load_sub_64 ADDRri:$addr, GPR:$val), + (XFADDD ADDRri:$addr, (NEG_64 GPR:$val))>; + +// Atomic Exchange +class XCHG<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode> + : TYPE_LD_ST<BPF_ATOMIC.Value, SizeOp.Value, + (outs GPR:$dst), + (ins MEMri:$addr, GPR:$val), + "$dst = xchg_"#OpcodeStr#"($addr, $val)", + [(set GPR:$dst, (OpNode ADDRri:$addr,GPR:$val))]> { + bits<4> dst; + bits<20> addr; + + let Inst{51-48} = addr{19-16}; // base reg + let Inst{55-52} = dst; + let Inst{47-32} = addr{15-0}; // offset + let Inst{7-4} = BPF_XCHG.Value; + let Inst{3-0} = BPF_FETCH.Value; + let BPFClass = BPF_STX; +} + +class XCHG32<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode> + : TYPE_LD_ST<BPF_ATOMIC.Value, SizeOp.Value, + (outs GPR32:$dst), + (ins MEMri:$addr, GPR32:$val), + "$dst = xchg32_"#OpcodeStr#"($addr, $val)", + [(set GPR32:$dst, (OpNode ADDRri:$addr,GPR32:$val))]> { + bits<4> dst; + bits<20> addr; + + let Inst{51-48} = addr{19-16}; // base reg + let Inst{55-52} = dst; + let Inst{47-32} = addr{15-0}; // offset + let Inst{7-4} = BPF_XCHG.Value; + let Inst{3-0} = BPF_FETCH.Value; + let BPFClass = BPF_STX; +} + +let Constraints = "$dst = $val" in { let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in { - def XADDW32 : XADD32<BPF_W, "u32", atomic_load_add_32>; + def XCHGW32 : XCHG32<BPF_W, "32", atomic_swap_32>; } - def XADDD : XADD<BPF_DW, "u64", atomic_load_add_64>; + def XCHGD : XCHG<BPF_DW, "64", atomic_swap_64>; +} + +// Compare-And-Exchange +class CMPXCHG<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode> + : TYPE_LD_ST<BPF_ATOMIC.Value, SizeOp.Value, + (outs), + (ins MEMri:$addr, GPR:$new), + "r0 = cmpxchg_"#OpcodeStr#"($addr, r0, $new)", + [(set R0, (OpNode ADDRri:$addr, R0, GPR:$new))]> { + bits<4> new; + bits<20> addr; + + let Inst{51-48} = addr{19-16}; // base reg + let Inst{55-52} = new; + let Inst{47-32} = addr{15-0}; // offset + let Inst{7-4} = BPF_CMPXCHG.Value; + let Inst{3-0} = BPF_FETCH.Value; + let BPFClass = BPF_STX; +} + +class CMPXCHG32<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode> + : TYPE_LD_ST<BPF_ATOMIC.Value, SizeOp.Value, + (outs), + (ins MEMri:$addr, GPR32:$new), + "w0 = cmpxchg32_"#OpcodeStr#"($addr, w0, $new)", + [(set W0, (OpNode ADDRri:$addr, W0, GPR32:$new))]> { + bits<4> new; + bits<20> addr; + + let Inst{51-48} = addr{19-16}; // base reg + let Inst{55-52} = new; + let Inst{47-32} = addr{15-0}; // offset + let Inst{7-4} = BPF_CMPXCHG.Value; + let Inst{3-0} = BPF_FETCH.Value; + let BPFClass = BPF_STX; +} + +let Predicates = [BPFHasALU32], Defs = [W0], Uses = [W0], + DecoderNamespace = "BPFALU32" in { + def CMPXCHGW32 : CMPXCHG32<BPF_W, "32", atomic_cmp_swap_32>; +} + +let Defs = [R0], Uses = [R0] in { + def CMPXCHGD : CMPXCHG<BPF_DW, "64", atomic_cmp_swap_64>; } // bswap16, bswap32, bswap64 Index: llvm/lib/Target/BPF/BPFInstrFormats.td =================================================================== --- llvm/lib/Target/BPF/BPFInstrFormats.td +++ llvm/lib/Target/BPF/BPFInstrFormats.td @@ -44,6 +44,9 @@ def BPF_ARSH : BPFArithOp<0xc>; def BPF_END : BPFArithOp<0xd>; +def BPF_XCHG : BPFArithOp<0xe>; +def BPF_CMPXCHG : BPFArithOp<0xf>; + class BPFEndDir<bits<1> val> { bits<1> Value = val; } @@ -86,7 +89,13 @@ def BPF_ABS : BPFModeModifer<0x1>; def BPF_IND : BPFModeModifer<0x2>; def BPF_MEM : BPFModeModifer<0x3>; -def BPF_XADD : BPFModeModifer<0x6>; +def BPF_ATOMIC : BPFModeModifer<0x6>; + +class BPFAtomicFlag<bits<4> val> { + bits<4> Value = val; +} + +def BPF_FETCH : BPFAtomicFlag<0x1>; class InstBPF<dag outs, dag ins, string asmstr, list<dag> pattern> : Instruction {
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits