vhscampos created this revision. Herald added subscribers: llvm-commits, cfe-commits, hiraditya, kristof.beyls. Herald added projects: clang, LLVM.
Writing support for two ACLE functions: unsigned int __cls(uint32_t x) unsigned int __clsl(unsigned long x) CLS stands for "Count number of leading sign bits". In AArch64, these two intrinsics can be translated into the 'cls' instruction directly. In AArch32, on the other hand, this functionality is achieved by implementing it in terms of clz (count number of leading zeros). Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D69250 Files: clang/include/clang/Basic/BuiltinsAArch64.def clang/include/clang/Basic/BuiltinsARM.def clang/lib/CodeGen/CGBuiltin.cpp clang/lib/Headers/arm_acle.h clang/test/CodeGen/builtins-arm.c clang/test/CodeGen/builtins-arm64.c llvm/include/llvm/IR/IntrinsicsAArch64.td llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/Target/AArch64/AArch64InstrInfo.td llvm/lib/Target/ARM/ARMISelLowering.cpp llvm/test/CodeGen/AArch64/cls.ll llvm/test/CodeGen/ARM/cls.ll
Index: llvm/test/CodeGen/ARM/cls.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/cls.ll @@ -0,0 +1,12 @@ +; RUN: llc -mtriple=armv5 %s -o - | FileCheck %s + +; CHECK: eor [[T:r[0-9]+]], [[T]], [[T]], asr #31 +; CHECK-NEXT: mov [[C1:r[0-9]+]], #1 +; CHECK-NEXT: orr [[T]], [[C1]], [[T]], lsl #1 +; CHECK-NEXT: clz [[T]], [[T]] +define i32 @cls(i32 %t) { + %cls.i = call i32 @llvm.arm.cls(i32 %t) + ret i32 %cls.i +} + +declare i32 @llvm.arm.cls(i32) nounwind Index: llvm/test/CodeGen/AArch64/cls.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/cls.ll @@ -0,0 +1,20 @@ +; RUN: llc -mtriple=aarch64-eabi %s -o - | FileCheck %s + +; @llvm.aarch64.cls must be directly translated into the 'cls' instruction + +; CHECK-LABEL: cls +; CHECK: cls [[REG:w[0-9]+]], [[REG]] +define i32 @cls(i32 %t) { + %cls.i = call i32 @llvm.aarch64.cls(i32 %t) + ret i32 %cls.i +} + +; CHECK-LABEL: cls64 +; CHECK: cls [[REG:x[0-9]+]], [[REG]] +define i32 @cls64(i64 %t) { + %cls.i = call i32 @llvm.aarch64.cls64(i64 %t) + ret i32 %cls.i +} + +declare i32 @llvm.aarch64.cls(i32) nounwind +declare i32 @llvm.aarch64.cls64(i64) nounwind Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -3625,6 +3625,19 @@ EVT PtrVT = getPointerTy(DAG.getDataLayout()); return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); } + case Intrinsic::arm_cls: { + const SDValue &Operand = Op.getOperand(1); + const EVT VTy = Op.getValueType(); + SDValue SRA = + DAG.getNode(ISD::SRA, dl, VTy, Operand, DAG.getConstant(31, dl, VTy)); + SDValue XOR = DAG.getNode(ISD::XOR, dl, VTy, SRA, Operand); + SDValue SHL = + DAG.getNode(ISD::SHL, dl, VTy, XOR, DAG.getConstant(1, dl, VTy)); + SDValue OR = + DAG.getNode(ISD::OR, dl, VTy, SHL, DAG.getConstant(1, dl, VTy)); + SDValue Result = DAG.getNode(ISD::CTLZ, dl, VTy, OR); + return Result; + } case Intrinsic::eh_sjlj_lsda: { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1478,6 +1478,8 @@ def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)), (i64 1))), (CLSXr GPR64:$Rn)>; +def : Pat<(int_aarch64_cls GPR32:$Rn), (CLSWr GPR32:$Rn)>; +def : Pat<(int_aarch64_cls64 GPR64:$Rm), (EXTRACT_SUBREG (CLSXr GPR64:$Rm), sub_32)>; // Unlike the other one operand instructions, the instructions with the "rev" // mnemonic do *not* just different in the size bit, but actually use different Index: llvm/include/llvm/IR/IntrinsicsARM.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsARM.td +++ llvm/include/llvm/IR/IntrinsicsARM.td @@ -787,4 +787,6 @@ [], [IntrReadMem, IntrWriteMem]>; +def int_arm_cls: Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; + } // end TargetPrefix Index: llvm/include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAArch64.td +++ llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -33,6 +33,9 @@ def int_aarch64_fjcvtzs : Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>; +def int_aarch64_cls: Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_aarch64_cls64: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>; + //===----------------------------------------------------------------------===// // HINT Index: clang/test/CodeGen/builtins-arm64.c =================================================================== --- clang/test/CodeGen/builtins-arm64.c +++ clang/test/CodeGen/builtins-arm64.c @@ -106,4 +106,16 @@ __builtin_arm_wsrp("1:2:3:4:5", v); } +unsigned int cls(uint32_t v) { + // CHECK: call i32 @llvm.aarch64.cls(i32 %v) + return __builtin_arm_cls(v); +} + +unsigned int clsl(unsigned long v) { + // CHECK-WIN: [[V64:%[^ ]+]] = zext i32 %v to i64 + // CHECK-WIN: call i32 @llvm.aarch64.cls64(i64 [[V64]] + // CHECK-LINUX: call i32 @llvm.aarch64.cls64(i64 %v) + return __builtin_arm_cls64(v); +} + // CHECK: ![[M0]] = !{!"1:2:3:4:5"} Index: clang/test/CodeGen/builtins-arm.c =================================================================== --- clang/test/CodeGen/builtins-arm.c +++ clang/test/CodeGen/builtins-arm.c @@ -256,6 +256,16 @@ __builtin_arm_wsrp("sysreg", v); } +unsigned int cls(uint32_t v) { + // CHECK: call i32 @llvm.arm.cls(i32 %v) + return __builtin_arm_cls(v); +} + +unsigned int clsl(unsigned long v) { + // CHECK: call i32 @llvm.arm.cls(i32 %v) + return __builtin_arm_cls(v); +} + // CHECK: ![[M0]] = !{!"cp1:2:c3:c4:5"} // CHECK: ![[M1]] = !{!"cp1:2:c3"} // CHECK: ![[M2]] = !{!"sysreg"} Index: clang/lib/Headers/arm_acle.h =================================================================== --- clang/lib/Headers/arm_acle.h +++ clang/lib/Headers/arm_acle.h @@ -139,6 +139,21 @@ return __builtin_clzll(__t); } +/* CLS */ +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +__cls(uint32_t __t) { + return __builtin_arm_cls(__t); +} + +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +__clsl(unsigned long __t) { +#if __SIZEOF_LONG__ == 4 + return __builtin_arm_cls(__t); +#else + return __builtin_arm_cls64(__t); +#endif +} + /* REV */ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __rev(uint32_t __t) { Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -6052,6 +6052,11 @@ CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); } + if (BuiltinID == ARM::BI__builtin_arm_cls) { + llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls"); + } + if (BuiltinID == ARM::BI__clear_cache) { assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); const FunctionDecl *FD = E->getDirectCallee(); @@ -7000,6 +7005,17 @@ CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); } + if (BuiltinID == AArch64::BI__builtin_arm_cls) { + llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg, + "cls"); + } + if (BuiltinID == AArch64::BI__builtin_arm_cls64) { + llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg, + "cls"); + } + if (BuiltinID == AArch64::BI__builtin_arm_jcvt) { assert((getContext().getTypeSize(E->getType()) == 32) && "__jcvt of unusual size!"); Index: clang/include/clang/Basic/BuiltinsARM.def =================================================================== --- clang/include/clang/Basic/BuiltinsARM.def +++ clang/include/clang/Basic/BuiltinsARM.def @@ -115,6 +115,7 @@ // Bit manipulation BUILTIN(__builtin_arm_rbit, "UiUi", "nc") +BUILTIN(__builtin_arm_cls, "UiZUi", "nc") // Store and load exclusive BUILTIN(__builtin_arm_ldrexd, "LLUiv*", "") Index: clang/include/clang/Basic/BuiltinsAArch64.def =================================================================== --- clang/include/clang/Basic/BuiltinsAArch64.def +++ clang/include/clang/Basic/BuiltinsAArch64.def @@ -33,6 +33,8 @@ // Bit manipulation BUILTIN(__builtin_arm_rbit, "UiUi", "nc") BUILTIN(__builtin_arm_rbit64, "WUiWUi", "nc") +BUILTIN(__builtin_arm_cls, "UiZUi", "nc") +BUILTIN(__builtin_arm_cls64, "UiWUi", "nc") // HINT BUILTIN(__builtin_arm_nop, "v", "")
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits