https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/154203
>From cc9d2d9e4923bad0d904d251fa116ad388b0b6ac Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Mon, 18 Aug 2025 15:59:46 -0500 Subject: [PATCH 1/6] [Clang] Support generic bit counting builtins on fixed boolean vectors Summary: Boolean vectors as implemented in clang can be bit-casted to an integer that is rounded up to the next primitive sized integer. Users can do this themselves, but since the counting bits are very likely to be used with bitmasks like this and the generic forms are expected to be generic it seems reasonable that we handle this case directly. --- clang/docs/LanguageExtensions.rst | 4 +- clang/docs/ReleaseNotes.rst | 4 +- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 46 +- clang/lib/AST/ExprConstant.cpp | 42 +- clang/lib/CodeGen/CGBuiltin.cpp | 28 +- clang/lib/Sema/SemaChecking.cpp | 4 +- clang/test/AST/ByteCode/builtin-functions.cpp | 4 + clang/test/CodeGen/builtins.c | 423 ++++++++++-------- 8 files changed, 346 insertions(+), 209 deletions(-) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 3c6c97bb1fa10..40f8fc9190f94 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -4182,7 +4182,7 @@ builtin, the mangler emits their usual pattern without any special treatment. ----------------------- ``__builtin_popcountg`` returns the number of 1 bits in the argument. The -argument can be of any unsigned integer type. +argument can be of any unsigned integer type or fixed boolean vector. **Syntax**: @@ -4214,7 +4214,7 @@ such as ``unsigned __int128`` and C23 ``unsigned _BitInt(N)``. ``__builtin_clzg`` (respectively ``__builtin_ctzg``) returns the number of leading (respectively trailing) 0 bits in the first argument. The first argument -can be of any unsigned integer type. +can be of any unsigned integer type or fixed boolean vector. If the first argument is 0 and an optional second argument of ``int`` type is provided, then the second argument is returned. If the first argument is 0, but diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index c32102d102cd3..f0732deec23c3 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -143,10 +143,12 @@ Non-comprehensive list of changes in this release - Added ``__builtin_masked_load`` and ``__builtin_masked_store`` for conditional memory loads from vectors. Binds to the LLVM intrinsic of the same name. +- The ``__builtin_popcountg``, ``__builtin_ctzg``, and ``__builtin_clzg`` + functions now accept fixed-size boolean vectors. + - Use of ``__has_feature`` to detect the ``ptrauth_qualifier`` and ``ptrauth_intrinsics`` features has been deprecated, and is restricted to the arm64e target only. The correct method to check for these features is to test for the ``__PTRAUTH__`` - macro. - Added a new builtin, ``__builtin_dedup_pack``, to remove duplicate types from a parameter pack. This feature is particularly useful in template metaprogramming for normalizing type lists. diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 2cbebaf7b630e..5039d9950a4a4 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -141,6 +141,22 @@ static void diagnoseNonConstexprBuiltin(InterpState &S, CodePtr OpPC, S.CCEDiag(Loc, diag::note_invalid_subexpr_in_const_expr); } +static llvm::APSInt convertBoolVectorToInt(const Pointer &Val) { + assert(Val.getFieldDesc()->isPrimitiveArray() && + Val.getFieldDesc()->getElemQualType()->isBooleanType() && + "Not a boolean vector"); + unsigned NumElts = Val.getNumElems(); + + // Each element is one bit, so create an integer with NumElts bits. + llvm::APSInt Result(NumElts, 0); + for (unsigned I = 0; I < NumElts; ++I) { + if (Val.elem<bool>(I)) + Result.setBit(I); + } + + return Result; +} + static bool interp__builtin_is_constant_evaluated(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, const CallExpr *Call) { @@ -638,8 +654,14 @@ static bool interp__builtin_abs(InterpState &S, CodePtr OpPC, static bool interp__builtin_popcount(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, const CallExpr *Call) { - PrimType ArgT = *S.getContext().classify(Call->getArg(0)->getType()); - APSInt Val = popToAPSInt(S.Stk, ArgT); + APSInt Val; + if (Call->getArg(0)->getType()->isExtVectorBoolType()) { + const Pointer &Arg = S.Stk.pop<Pointer>(); + Val = convertBoolVectorToInt(Arg); + } else { + PrimType ArgT = *S.getContext().classify(Call->getArg(0)->getType()); + Val = popToAPSInt(S.Stk, ArgT); + } pushInteger(S, Val.popcount(), Call->getType()); return true; } @@ -935,8 +957,14 @@ static bool interp__builtin_clz(InterpState &S, CodePtr OpPC, PrimType FallbackT = *S.getContext().classify(Call->getArg(1)); Fallback = popToAPSInt(S.Stk, FallbackT); } - PrimType ValT = *S.getContext().classify(Call->getArg(0)); - const APSInt &Val = popToAPSInt(S.Stk, ValT); + APSInt Val; + if (Call->getArg(0)->getType()->isExtVectorBoolType()) { + const Pointer &Arg = S.Stk.pop<Pointer>(); + Val = convertBoolVectorToInt(Arg); + } else { + PrimType ValT = *S.getContext().classify(Call->getArg(0)); + Val = popToAPSInt(S.Stk, ValT); + } // When the argument is 0, the result of GCC builtins is undefined, whereas // for Microsoft intrinsics, the result is the bit-width of the argument. @@ -966,8 +994,14 @@ static bool interp__builtin_ctz(InterpState &S, CodePtr OpPC, PrimType FallbackT = *S.getContext().classify(Call->getArg(1)); Fallback = popToAPSInt(S.Stk, FallbackT); } - PrimType ValT = *S.getContext().classify(Call->getArg(0)); - const APSInt &Val = popToAPSInt(S.Stk, ValT); + APSInt Val; + if (Call->getArg(0)->getType()->isExtVectorBoolType()) { + const Pointer &Arg = S.Stk.pop<Pointer>(); + Val = convertBoolVectorToInt(Arg); + } else { + PrimType ValT = *S.getContext().classify(Call->getArg(0)); + Val = popToAPSInt(S.Stk, ValT); + } if (Val == 0) { if (Fallback) { diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index a03e64fcffde2..e0d1b03dd6d2f 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11314,6 +11314,24 @@ static bool EvaluateVector(const Expr* E, APValue& Result, EvalInfo &Info) { return VectorExprEvaluator(Info, Result).Visit(E); } +static llvm::APInt ConvertBoolVectorToInt(const APValue &Val) { + assert(Val.isVector() && "expected vector APValue"); + unsigned NumElts = Val.getVectorLength(); + + // Each element is one bit, so create an integer with NumElts bits. + llvm::APInt Result(NumElts, 0); + + for (unsigned I = 0; I < NumElts; ++I) { + const APValue &Elt = Val.getVectorElt(I); + assert(Elt.isInt() && "expected integer element in bool vector"); + + if (Elt.getInt().getBoolValue()) + Result.setBit(I); + } + + return Result; +} + bool VectorExprEvaluator::VisitCastExpr(const CastExpr *E) { const VectorType *VTy = E->getType()->castAs<VectorType>(); unsigned NElts = VTy->getNumElements(); @@ -13456,8 +13474,14 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, case Builtin::BI__lzcnt: case Builtin::BI__lzcnt64: { APSInt Val; - if (!EvaluateInteger(E->getArg(0), Val, Info)) + if (E->getArg(0)->getType()->isExtVectorBoolType()) { + APValue Vec; + if (!EvaluateVector(E->getArg(0), Vec, Info)) + return false; + Val = ConvertBoolVectorToInt(Vec); + } else if (!EvaluateInteger(E->getArg(0), Val, Info)) { return false; + } std::optional<APSInt> Fallback; if ((BuiltinOp == Builtin::BI__builtin_clzg || @@ -13542,8 +13566,14 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, case Builtin::BI__builtin_ctzg: case Builtin::BI__builtin_elementwise_cttz: { APSInt Val; - if (!EvaluateInteger(E->getArg(0), Val, Info)) + if (E->getArg(0)->getType()->isExtVectorBoolType()) { + APValue Vec; + if (!EvaluateVector(E->getArg(0), Vec, Info)) + return false; + Val = ConvertBoolVectorToInt(Vec); + } else if (!EvaluateInteger(E->getArg(0), Val, Info)) { return false; + } std::optional<APSInt> Fallback; if ((BuiltinOp == Builtin::BI__builtin_ctzg || @@ -13758,8 +13788,14 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, case Builtin::BI__popcnt: case Builtin::BI__popcnt64: { APSInt Val; - if (!EvaluateInteger(E->getArg(0), Val, Info)) + if (E->getArg(0)->getType()->isExtVectorBoolType()) { + APValue Vec; + if (!EvaluateVector(E->getArg(0), Vec, Info)) + return false; + Val = ConvertBoolVectorToInt(Vec); + } else if (!EvaluateInteger(E->getArg(0), Val, Info)) { return false; + } return Success(Val.popcount(), E); } diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index d9cc37d123fb4..b98218c36161d 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1693,6 +1693,26 @@ getBitTestAtomicOrdering(BitTest::InterlockingKind I) { llvm_unreachable("invalid interlocking"); } +static llvm::Value *EmitBitCountExpr(CodeGenFunction &CGF, const Expr *E) { + llvm::Value *ArgValue = CGF.EmitScalarExpr(E); + llvm::Type *ArgType = ArgValue->getType(); + + // Boolean vectors can be casted directly to its bitfield representation. We + // intentionally do not round up to the next power of two size and let LLVM + // handle the trailing bits. + if (auto *VT = dyn_cast<llvm::FixedVectorType>(ArgType); + VT && VT->getElementType()->isIntegerTy(1)) { + llvm::Type *StorageType = + llvm::Type::getIntNTy(CGF.getLLVMContext(), VT->getNumElements()); + ArgValue = CGF.emitBoolVecConversion( + ArgValue, StorageType->getPrimitiveSizeInBits(), "insertvec"); + ArgValue = CGF.Builder.CreateBitCast(ArgValue, StorageType); + ArgType = ArgValue->getType(); + } + + return ArgValue; +} + /// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of /// bits and a bit position and read and optionally modify the bit at that /// position. The position index can be arbitrarily large, i.e. it can be larger @@ -2020,7 +2040,7 @@ Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E, assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) && "Unsupported builtin check kind"); - Value *ArgValue = EmitScalarExpr(E); + Value *ArgValue = EmitBitCountExpr(*this, E); if (!SanOpts.has(SanitizerKind::Builtin)) return ArgValue; @@ -3334,7 +3354,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, E->getNumArgs() > 1; Value *ArgValue = - HasFallback ? EmitScalarExpr(E->getArg(0)) + HasFallback ? EmitBitCountExpr(*this, E->getArg(0)) : EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero); llvm::Type *ArgType = ArgValue->getType(); @@ -3371,7 +3391,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, E->getNumArgs() > 1; Value *ArgValue = - HasFallback ? EmitScalarExpr(E->getArg(0)) + HasFallback ? EmitBitCountExpr(*this, E->getArg(0)) : EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero); llvm::Type *ArgType = ArgValue->getType(); @@ -3456,7 +3476,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_popcountl: case Builtin::BI__builtin_popcountll: case Builtin::BI__builtin_popcountg: { - Value *ArgValue = EmitScalarExpr(E->getArg(0)); + Value *ArgValue = EmitBitCountExpr(*this, E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 2944c1a09b32c..e343d77503cc2 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2214,7 +2214,7 @@ static bool BuiltinPopcountg(Sema &S, CallExpr *TheCall) { QualType ArgTy = Arg->getType(); - if (!ArgTy->isUnsignedIntegerType()) { + if (!ArgTy->isUnsignedIntegerType() && !ArgTy->isExtVectorBoolType()) { S.Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type) << 1 << /* scalar */ 1 << /* unsigned integer ty */ 3 << /* no fp */ 0 << ArgTy; @@ -2239,7 +2239,7 @@ static bool BuiltinCountZeroBitsGeneric(Sema &S, CallExpr *TheCall) { QualType Arg0Ty = Arg0->getType(); - if (!Arg0Ty->isUnsignedIntegerType()) { + if (!Arg0Ty->isUnsignedIntegerType() && !Arg0Ty->isExtVectorBoolType()) { S.Diag(Arg0->getBeginLoc(), diag::err_builtin_invalid_arg_type) << 1 << /* scalar */ 1 << /* unsigned integer ty */ 3 << /* no fp */ 0 << Arg0Ty; diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp index 3277ef65a880b..f47bc49d9a1a8 100644 --- a/clang/test/AST/ByteCode/builtin-functions.cpp +++ b/clang/test/AST/ByteCode/builtin-functions.cpp @@ -454,6 +454,7 @@ namespace SourceLocation { } #define BITSIZE(x) (sizeof(x) * 8) +constexpr bool __attribute__((ext_vector_type(4))) v4b{}; namespace popcount { static_assert(__builtin_popcount(~0u) == __CHAR_BIT__ * sizeof(unsigned int), ""); static_assert(__builtin_popcount(0) == 0, ""); @@ -471,6 +472,7 @@ namespace popcount { static_assert(__builtin_popcountg(0ul) == 0, ""); static_assert(__builtin_popcountg(~0ull) == __CHAR_BIT__ * sizeof(unsigned long long), ""); static_assert(__builtin_popcountg(0ull) == 0, ""); + static_assert(__builtin_popcountg(v4b) == 0, ""); #ifdef __SIZEOF_INT128__ static_assert(__builtin_popcountg(~(unsigned __int128)0) == __CHAR_BIT__ * sizeof(unsigned __int128), ""); static_assert(__builtin_popcountg((unsigned __int128)0) == 0, ""); @@ -743,6 +745,7 @@ namespace clz { char clz62[__builtin_clzg((unsigned _BitInt(128))0xf) == BITSIZE(_BitInt(128)) - 4 ? 1 : -1]; char clz63[__builtin_clzg((unsigned _BitInt(128))0xf, 42) == BITSIZE(_BitInt(128)) - 4 ? 1 : -1]; #endif + char clz64[__builtin_clzg(v4b, 0) == 0 ? 1 : -1]; } namespace ctz { @@ -813,6 +816,7 @@ namespace ctz { char ctz62[__builtin_ctzg((unsigned _BitInt(128))1 << (BITSIZE(_BitInt(128)) - 1)) == BITSIZE(_BitInt(128)) - 1 ? 1 : -1]; char ctz63[__builtin_ctzg((unsigned _BitInt(128))1 << (BITSIZE(_BitInt(128)) - 1), 42) == BITSIZE(_BitInt(128)) - 1 ? 1 : -1]; #endif + char clz64[__builtin_ctzg(v4b, 0) == 0 ? 1 : -1]; } namespace bswap { diff --git a/clang/test/CodeGen/builtins.c b/clang/test/CodeGen/builtins.c index aa9965b815983..7ad143ed165c8 100644 --- a/clang/test/CodeGen/builtins.c +++ b/clang/test/CodeGen/builtins.c @@ -991,247 +991,288 @@ void test_builtin_os_log_long_double(void *buf, long double ld) { void test_builtin_popcountg(unsigned char uc, unsigned short us, unsigned int ui, unsigned long ul, unsigned long long ull, unsigned __int128 ui128, - unsigned _BitInt(128) ubi128) { + unsigned _BitInt(128) ubi128, + _Bool __attribute__((ext_vector_type(8))) vb8) { volatile int pop; - pop = __builtin_popcountg(uc); - // CHECK: %1 = load i8, ptr %uc.addr, align 1 - // CHECK-NEXT: %2 = call i8 @llvm.ctpop.i8(i8 %1) - // CHECK-NEXT: %cast = zext i8 %2 to i32 + // CHECK: %2 = load i8, ptr %uc.addr, align 1 + // CHECK-NEXT: %3 = call i8 @llvm.ctpop.i8(i8 %2) + // CHECK-NEXT: %cast = zext i8 %3 to i32 // CHECK-NEXT: store volatile i32 %cast, ptr %pop, align 4 + pop = __builtin_popcountg(uc); + // CHECK: %4 = load i16, ptr %us.addr, align 2 + // CHECK-NEXT: %5 = call i16 @llvm.ctpop.i16(i16 %4) + // CHECK-NEXT: %cast2 = zext i16 %5 to i32 + // CHECK-NEXT: store volatile i32 %cast2, ptr %pop, align 4 pop = __builtin_popcountg(us); - // CHECK-NEXT: %3 = load i16, ptr %us.addr, align 2 - // CHECK-NEXT: %4 = call i16 @llvm.ctpop.i16(i16 %3) - // CHECK-NEXT: %cast1 = zext i16 %4 to i32 - // CHECK-NEXT: store volatile i32 %cast1, ptr %pop, align 4 + // CHECK: %6 = load i32, ptr %ui.addr, align 4 + // CHECK-NEXT: %7 = call i32 @llvm.ctpop.i32(i32 %6) + // CHECK-NEXT: store volatile i32 %7, ptr %pop, align 4 pop = __builtin_popcountg(ui); - // CHECK-NEXT: %5 = load i32, ptr %ui.addr, align 4 - // CHECK-NEXT: %6 = call i32 @llvm.ctpop.i32(i32 %5) - // CHECK-NEXT: store volatile i32 %6, ptr %pop, align 4 + // CHECK: %8 = load i64, ptr %ul.addr, align 8 + // CHECK-NEXT: %9 = call i64 @llvm.ctpop.i64(i64 %8) + // CHECK-NEXT: %cast3 = trunc i64 %9 to i32 + // CHECK-NEXT: store volatile i32 %cast3, ptr %pop, align 4 pop = __builtin_popcountg(ul); - // CHECK-NEXT: %7 = load i64, ptr %ul.addr, align 8 - // CHECK-NEXT: %8 = call i64 @llvm.ctpop.i64(i64 %7) - // CHECK-NEXT: %cast2 = trunc i64 %8 to i32 - // CHECK-NEXT: store volatile i32 %cast2, ptr %pop, align 4 + // CHECK: %10 = load i64, ptr %ull.addr, align 8 + // CHECK-NEXT: %11 = call i64 @llvm.ctpop.i64(i64 %10) + // CHECK-NEXT: %cast4 = trunc i64 %11 to i32 + // CHECK-NEXT: store volatile i32 %cast4, ptr %pop, align 4 pop = __builtin_popcountg(ull); - // CHECK-NEXT: %9 = load i64, ptr %ull.addr, align 8 - // CHECK-NEXT: %10 = call i64 @llvm.ctpop.i64(i64 %9) - // CHECK-NEXT: %cast3 = trunc i64 %10 to i32 - // CHECK-NEXT: store volatile i32 %cast3, ptr %pop, align 4 + // CHECK: %12 = load i128, ptr %ui128.addr, align 16 + // CHECK-NEXT: %13 = call i128 @llvm.ctpop.i128(i128 %12) + // CHECK-NEXT: %cast5 = trunc i128 %13 to i32 + // CHECK-NEXT: store volatile i32 %cast5, ptr %pop, align 4 pop = __builtin_popcountg(ui128); - // CHECK-NEXT: %11 = load i128, ptr %ui128.addr, align 16 - // CHECK-NEXT: %12 = call i128 @llvm.ctpop.i128(i128 %11) - // CHECK-NEXT: %cast4 = trunc i128 %12 to i32 - // CHECK-NEXT: store volatile i32 %cast4, ptr %pop, align 4 + // CHECK: %14 = load i128, ptr %ubi128.addr, align 8 + // CHECK-NEXT: %15 = call i128 @llvm.ctpop.i128(i128 %14) + // CHECK-NEXT: %cast6 = trunc i128 %15 to i32 + // CHECK-NEXT: store volatile i32 %cast6, ptr %pop, align 4 pop = __builtin_popcountg(ubi128); - // CHECK-NEXT: %13 = load i128, ptr %ubi128.addr, align 8 - // CHECK-NEXT: %14 = call i128 @llvm.ctpop.i128(i128 %13) - // CHECK-NEXT: %cast5 = trunc i128 %14 to i32 - // CHECK-NEXT: store volatile i32 %cast5, ptr %pop, align 4 - // CHECK-NEXT: ret void + // CHECK: %load_bits7 = load i8, ptr %vb8.addr, align 1 + // CHECK-NEXT: %16 = bitcast i8 %load_bits7 to <8 x i1> + // CHECK-NEXT: %17 = bitcast <8 x i1> %16 to i8 + // CHECK-NEXT: %18 = call i8 @llvm.ctpop.i8(i8 %17) + // CHECK-NEXT: %cast8 = zext i8 %18 to i32 + // CHECK-NEXT: store volatile i32 %cast8, ptr %pop, align 4 + pop = __builtin_popcountg(vb8); } // CHECK-LABEL: define{{.*}} void @test_builtin_clzg void test_builtin_clzg(unsigned char uc, unsigned short us, unsigned int ui, unsigned long ul, unsigned long long ull, unsigned __int128 ui128, unsigned _BitInt(128) ubi128, - signed char sc, short s, int i) { + signed char sc, short s, int i, + _Bool __attribute__((ext_vector_type(8))) vb8) { volatile int lz; + // CHECK: %2 = load i8, ptr %uc.addr, align 1 + // CHECK-NEXT: %3 = call i8 @llvm.ctlz.i8(i8 %2, i1 true) + // CHECK-NEXT: %cast = zext i8 %3 to i32 + // CHECK-NEXT: store volatile i32 %cast, ptr %lz, align 4 lz = __builtin_clzg(uc); - // CHECK: %1 = load i8, ptr %uc.addr, align 1 - // CHECK-NEXT: %2 = call i8 @llvm.ctlz.i8(i8 %1, i1 true) - // CHECK-NEXT: %cast = zext i8 %2 to i32 - // CHECK-NEXT: store volatile i32 %cast, ptr %lz, align 4 + // CHECK-NEXT: %4 = load i16, ptr %us.addr, align 2 + // CHECK-NEXT: %5 = call i16 @llvm.ctlz.i16(i16 %4, i1 true) + // CHECK-NEXT: %cast2 = zext i16 %5 to i32 + // CHECK-NEXT: store volatile i32 %cast2, ptr %lz, align 4 lz = __builtin_clzg(us); - // CHECK-NEXT: %3 = load i16, ptr %us.addr, align 2 - // CHECK-NEXT: %4 = call i16 @llvm.ctlz.i16(i16 %3, i1 true) - // CHECK-NEXT: %cast1 = zext i16 %4 to i32 - // CHECK-NEXT: store volatile i32 %cast1, ptr %lz, align 4 + // CHECK-NEXT: %6 = load i32, ptr %ui.addr, align 4 + // CHECK-NEXT: %7 = call i32 @llvm.ctlz.i32(i32 %6, i1 true) + // CHECK-NEXT: store volatile i32 %7, ptr %lz, align 4 lz = __builtin_clzg(ui); - // CHECK-NEXT: %5 = load i32, ptr %ui.addr, align 4 - // CHECK-NEXT: %6 = call i32 @llvm.ctlz.i32(i32 %5, i1 true) - // CHECK-NEXT: store volatile i32 %6, ptr %lz, align 4 + // CHECK-NEXT: %8 = load i64, ptr %ul.addr, align 8 + // CHECK-NEXT: %9 = call i64 @llvm.ctlz.i64(i64 %8, i1 true) + // CHECK-NEXT: %cast3 = trunc i64 %9 to i32 + // CHECK-NEXT: store volatile i32 %cast3, ptr %lz, align 4 lz = __builtin_clzg(ul); - // CHECK-NEXT: %7 = load i64, ptr %ul.addr, align 8 - // CHECK-NEXT: %8 = call i64 @llvm.ctlz.i64(i64 %7, i1 true) - // CHECK-NEXT: %cast2 = trunc i64 %8 to i32 - // CHECK-NEXT: store volatile i32 %cast2, ptr %lz, align 4 + // CHECK-NEXT: %10 = load i64, ptr %ull.addr, align 8 + // CHECK-NEXT: %11 = call i64 @llvm.ctlz.i64(i64 %10, i1 true) + // CHECK-NEXT: %cast4 = trunc i64 %11 to i32 + // CHECK-NEXT: store volatile i32 %cast4, ptr %lz, align 4 lz = __builtin_clzg(ull); - // CHECK-NEXT: %9 = load i64, ptr %ull.addr, align 8 - // CHECK-NEXT: %10 = call i64 @llvm.ctlz.i64(i64 %9, i1 true) - // CHECK-NEXT: %cast3 = trunc i64 %10 to i32 - // CHECK-NEXT: store volatile i32 %cast3, ptr %lz, align 4 + // CHECK-NEXT: %12 = load i128, ptr %ui128.addr, align 16 + // CHECK-NEXT: %13 = call i128 @llvm.ctlz.i128(i128 %12, i1 true) + // CHECK-NEXT: %cast5 = trunc i128 %13 to i32 + // CHECK-NEXT: store volatile i32 %cast5, ptr %lz, align 4 lz = __builtin_clzg(ui128); - // CHECK-NEXT: %11 = load i128, ptr %ui128.addr, align 16 - // CHECK-NEXT: %12 = call i128 @llvm.ctlz.i128(i128 %11, i1 true) - // CHECK-NEXT: %cast4 = trunc i128 %12 to i32 - // CHECK-NEXT: store volatile i32 %cast4, ptr %lz, align 4 + // CHECK-NEXT: %14 = load i128, ptr %ubi128.addr, align 8 + // CHECK-NEXT: %15 = call i128 @llvm.ctlz.i128(i128 %14, i1 true) + // CHECK-NEXT: %cast6 = trunc i128 %15 to i32 + // CHECK-NEXT: store volatile i32 %cast6, ptr %lz, align 4 lz = __builtin_clzg(ubi128); - // CHECK-NEXT: %13 = load i128, ptr %ubi128.addr, align 8 - // CHECK-NEXT: %14 = call i128 @llvm.ctlz.i128(i128 %13, i1 true) - // CHECK-NEXT: %cast5 = trunc i128 %14 to i32 - // CHECK-NEXT: store volatile i32 %cast5, ptr %lz, align 4 + // CHECK-NEXT: %load_bits7 = load i8, ptr %vb8.addr, align 1 + // CHECK-NEXT: %16 = bitcast i8 %load_bits7 to <8 x i1> + // CHECK-NEXT: %17 = bitcast <8 x i1> %16 to i8 + // CHECK-NEXT: %18 = call i8 @llvm.ctlz.i8(i8 %17, i1 true) + // CHECK-NEXT: %cast8 = zext i8 %18 to i32 + // CHECK-NEXT: store volatile i32 %cast8, ptr %lz, align 4 + lz = __builtin_clzg(vb8); + // CHECK-NEXT: %19 = load i8, ptr %uc.addr, align 1 + // CHECK-NEXT: %20 = call i8 @llvm.ctlz.i8(i8 %19, i1 true) + // CHECK-NEXT: %cast9 = zext i8 %20 to i32 + // CHECK-NEXT: %iszero = icmp eq i8 %19, 0 + // CHECK-NEXT: %21 = load i8, ptr %sc.addr, align 1 + // CHECK-NEXT: %conv = sext i8 %21 to i32 + // CHECK-NEXT: %clzg = select i1 %iszero, i32 %conv, i32 %cast9 + // CHECK-NEXT: store volatile i32 %clzg, ptr %lz, align 4 lz = __builtin_clzg(uc, sc); - // CHECK-NEXT: %15 = load i8, ptr %uc.addr, align 1 - // CHECK-NEXT: %16 = call i8 @llvm.ctlz.i8(i8 %15, i1 true) - // CHECK-NEXT: %cast6 = zext i8 %16 to i32 - // CHECK-NEXT: %iszero = icmp eq i8 %15, 0 - // CHECK-NEXT: %17 = load i8, ptr %sc.addr, align 1 - // CHECK-NEXT: %conv = sext i8 %17 to i32 - // CHECK-NEXT: %clzg = select i1 %iszero, i32 %conv, i32 %cast6 - // CHECK-NEXT: store volatile i32 %clzg, ptr %lz, align 4 + // CHECK-NEXT: %22 = load i16, ptr %us.addr, align 2 + // CHECK-NEXT: %23 = call i16 @llvm.ctlz.i16(i16 %22, i1 true) + // CHECK-NEXT: %cast10 = zext i16 %23 to i32 + // CHECK-NEXT: %iszero11 = icmp eq i16 %22, 0 + // CHECK-NEXT: %24 = load i8, ptr %uc.addr, align 1 + // CHECK-NEXT: %conv12 = zext i8 %24 to i32 + // CHECK-NEXT: %clzg13 = select i1 %iszero11, i32 %conv12, i32 %cast10 + // CHECK-NEXT: store volatile i32 %clzg13, ptr %lz, align 4 lz = __builtin_clzg(us, uc); - // CHECK-NEXT: %18 = load i16, ptr %us.addr, align 2 - // CHECK-NEXT: %19 = call i16 @llvm.ctlz.i16(i16 %18, i1 true) - // CHECK-NEXT: %cast7 = zext i16 %19 to i32 - // CHECK-NEXT: %iszero8 = icmp eq i16 %18, 0 - // CHECK-NEXT: %20 = load i8, ptr %uc.addr, align 1 - // CHECK-NEXT: %conv9 = zext i8 %20 to i32 - // CHECK-NEXT: %clzg10 = select i1 %iszero8, i32 %conv9, i32 %cast7 - // CHECK-NEXT: store volatile i32 %clzg10, ptr %lz, align 4 + // CHECK-NEXT: %25 = load i32, ptr %ui.addr, align 4 + // CHECK-NEXT: %26 = call i32 @llvm.ctlz.i32(i32 %25, i1 true) + // CHECK-NEXT: %iszero14 = icmp eq i32 %25, 0 + // CHECK-NEXT: %27 = load i16, ptr %s.addr, align 2 + // CHECK-NEXT: %conv15 = sext i16 %27 to i32 + // CHECK-NEXT: %clzg16 = select i1 %iszero14, i32 %conv15, i32 %26 + // CHECK-NEXT: store volatile i32 %clzg16, ptr %lz, align 4 lz = __builtin_clzg(ui, s); - // CHECK-NEXT: %21 = load i32, ptr %ui.addr, align 4 - // CHECK-NEXT: %22 = call i32 @llvm.ctlz.i32(i32 %21, i1 true) - // CHECK-NEXT: %iszero11 = icmp eq i32 %21, 0 - // CHECK-NEXT: %23 = load i16, ptr %s.addr, align 2 - // CHECK-NEXT: %conv12 = sext i16 %23 to i32 - // CHECK-NEXT: %clzg13 = select i1 %iszero11, i32 %conv12, i32 %22 - // CHECK-NEXT: store volatile i32 %clzg13, ptr %lz, align 4 + // CHECK-NEXT: %28 = load i64, ptr %ul.addr, align 8 + // CHECK-NEXT: %29 = call i64 @llvm.ctlz.i64(i64 %28, i1 true) + // CHECK-NEXT: %cast17 = trunc i64 %29 to i32 + // CHECK-NEXT: %iszero18 = icmp eq i64 %28, 0 + // CHECK-NEXT: %30 = load i16, ptr %us.addr, align 2 + // CHECK-NEXT: %conv19 = zext i16 %30 to i32 + // CHECK-NEXT: %clzg20 = select i1 %iszero18, i32 %conv19, i32 %cast17 + // CHECK-NEXT: store volatile i32 %clzg20, ptr %lz, align 4 lz = __builtin_clzg(ul, us); - // CHECK-NEXT: %24 = load i64, ptr %ul.addr, align 8 - // CHECK-NEXT: %25 = call i64 @llvm.ctlz.i64(i64 %24, i1 true) - // CHECK-NEXT: %cast14 = trunc i64 %25 to i32 - // CHECK-NEXT: %iszero15 = icmp eq i64 %24, 0 - // CHECK-NEXT: %26 = load i16, ptr %us.addr, align 2 - // CHECK-NEXT: %conv16 = zext i16 %26 to i32 - // CHECK-NEXT: %clzg17 = select i1 %iszero15, i32 %conv16, i32 %cast14 - // CHECK-NEXT: store volatile i32 %clzg17, ptr %lz, align 4 + // CHECK-NEXT: %31 = load i64, ptr %ull.addr, align 8 + // CHECK-NEXT: %32 = call i64 @llvm.ctlz.i64(i64 %31, i1 true) + // CHECK-NEXT: %cast21 = trunc i64 %32 to i32 + // CHECK-NEXT: %iszero22 = icmp eq i64 %31, 0 + // CHECK-NEXT: %33 = load i32, ptr %i.addr, align 4 + // CHECK-NEXT: %clzg23 = select i1 %iszero22, i32 %33, i32 %cast21 + // CHECK-NEXT: store volatile i32 %clzg23, ptr %lz, align 4 lz = __builtin_clzg(ull, i); - // CHECK-NEXT: %27 = load i64, ptr %ull.addr, align 8 - // CHECK-NEXT: %28 = call i64 @llvm.ctlz.i64(i64 %27, i1 true) - // CHECK-NEXT: %cast18 = trunc i64 %28 to i32 - // CHECK-NEXT: %iszero19 = icmp eq i64 %27, 0 - // CHECK-NEXT: %29 = load i32, ptr %i.addr, align 4 - // CHECK-NEXT: %clzg20 = select i1 %iszero19, i32 %29, i32 %cast18 - // CHECK-NEXT: store volatile i32 %clzg20, ptr %lz, align 4 + // CHECK-NEXT: %34 = load i128, ptr %ui128.addr, align 16 + // CHECK-NEXT: %35 = call i128 @llvm.ctlz.i128(i128 %34, i1 true) + // CHECK-NEXT: %cast24 = trunc i128 %35 to i32 + // CHECK-NEXT: %iszero25 = icmp eq i128 %34, 0 + // CHECK-NEXT: %36 = load i32, ptr %i.addr, align 4 + // CHECK-NEXT: %clzg26 = select i1 %iszero25, i32 %36, i32 %cast24 + // CHECK-NEXT: store volatile i32 %clzg26, ptr %lz, align 4 lz = __builtin_clzg(ui128, i); - // CHECK-NEXT: %30 = load i128, ptr %ui128.addr, align 16 - // CHECK-NEXT: %31 = call i128 @llvm.ctlz.i128(i128 %30, i1 true) - // CHECK-NEXT: %cast21 = trunc i128 %31 to i32 - // CHECK-NEXT: %iszero22 = icmp eq i128 %30, 0 - // CHECK-NEXT: %32 = load i32, ptr %i.addr, align 4 - // CHECK-NEXT: %clzg23 = select i1 %iszero22, i32 %32, i32 %cast21 - // CHECK-NEXT: store volatile i32 %clzg23, ptr %lz, align 4 + // CHECK-NEXT: %37 = load i128, ptr %ubi128.addr, align 8 + // CHECK-NEXT: %38 = call i128 @llvm.ctlz.i128(i128 %37, i1 true) + // CHECK-NEXT: %cast27 = trunc i128 %38 to i32 + // CHECK-NEXT: %iszero28 = icmp eq i128 %37, 0 + // CHECK-NEXT: %39 = load i32, ptr %i.addr, align 4 + // CHECK-NEXT: %clzg29 = select i1 %iszero28, i32 %39, i32 %cast27 + // CHECK-NEXT: store volatile i32 %clzg29, ptr %lz, align 4 lz = __builtin_clzg(ubi128, i); - // CHECK-NEXT: %33 = load i128, ptr %ubi128.addr, align 8 - // CHECK-NEXT: %34 = call i128 @llvm.ctlz.i128(i128 %33, i1 true) - // CHECK-NEXT: %cast24 = trunc i128 %34 to i32 - // CHECK-NEXT: %iszero25 = icmp eq i128 %33, 0 - // CHECK-NEXT: %35 = load i32, ptr %i.addr, align 4 - // CHECK-NEXT: %clzg26 = select i1 %iszero25, i32 %35, i32 %cast24 - // CHECK-NEXT: store volatile i32 %clzg26, ptr %lz, align 4 - // CHECK-NEXT: ret void + // CHECK-NEXT: %load_bits30 = load i8, ptr %vb8.addr, align 1 + // CHECK-NEXT: %40 = bitcast i8 %load_bits30 to <8 x i1> + // CHECK-NEXT: %41 = bitcast <8 x i1> %40 to i8 + // CHECK-NEXT: %42 = call i8 @llvm.ctlz.i8(i8 %41, i1 true) + // CHECK-NEXT: %cast31 = zext i8 %42 to i32 + // CHECK-NEXT: %iszero32 = icmp eq i8 %41, 0 + // CHECK-NEXT: %43 = load i32, ptr %i.addr, align 4 + // CHECK-NEXT: %clzg33 = select i1 %iszero32, i32 %43, i32 %cast31 + // CHECK-NEXT: store volatile i32 %clzg33, ptr %lz, align 4 + lz = __builtin_clzg(vb8, i); } // CHECK-LABEL: define{{.*}} void @test_builtin_ctzg void test_builtin_ctzg(unsigned char uc, unsigned short us, unsigned int ui, unsigned long ul, unsigned long long ull, unsigned __int128 ui128, unsigned _BitInt(128) ubi128, - signed char sc, short s, int i) { + signed char sc, short s, int i, + _Bool __attribute__((ext_vector_type(8))) vb8) { volatile int tz; - tz = __builtin_ctzg(uc); - // CHECK: %1 = load i8, ptr %uc.addr, align 1 - // CHECK-NEXT: %2 = call i8 @llvm.cttz.i8(i8 %1, i1 true) - // CHECK-NEXT: %cast = zext i8 %2 to i32 + // CHECK: %2 = load i8, ptr %uc.addr, align 1 + // CHECK-NEXT: %3 = call i8 @llvm.cttz.i8(i8 %2, i1 true) + // CHECK-NEXT: %cast = zext i8 %3 to i32 // CHECK-NEXT: store volatile i32 %cast, ptr %tz, align 4 + tz = __builtin_ctzg(uc); + // CHECK-NEXT: %4 = load i16, ptr %us.addr, align 2 + // CHECK-NEXT: %5 = call i16 @llvm.cttz.i16(i16 %4, i1 true) + // CHECK-NEXT: %cast2 = zext i16 %5 to i32 + // CHECK-NEXT: store volatile i32 %cast2, ptr %tz, align 4 tz = __builtin_ctzg(us); - // CHECK-NEXT: %3 = load i16, ptr %us.addr, align 2 - // CHECK-NEXT: %4 = call i16 @llvm.cttz.i16(i16 %3, i1 true) - // CHECK-NEXT: %cast1 = zext i16 %4 to i32 - // CHECK-NEXT: store volatile i32 %cast1, ptr %tz, align 4 + // CHECK-NEXT: %6 = load i32, ptr %ui.addr, align 4 + // CHECK-NEXT: %7 = call i32 @llvm.cttz.i32(i32 %6, i1 true) + // CHECK-NEXT: store volatile i32 %7, ptr %tz, align 4 tz = __builtin_ctzg(ui); - // CHECK-NEXT: %5 = load i32, ptr %ui.addr, align 4 - // CHECK-NEXT: %6 = call i32 @llvm.cttz.i32(i32 %5, i1 true) - // CHECK-NEXT: store volatile i32 %6, ptr %tz, align 4 + // CHECK-NEXT: %8 = load i64, ptr %ul.addr, align 8 + // CHECK-NEXT: %9 = call i64 @llvm.cttz.i64(i64 %8, i1 true) + // CHECK-NEXT: %cast3 = trunc i64 %9 to i32 + // CHECK-NEXT: store volatile i32 %cast3, ptr %tz, align 4 tz = __builtin_ctzg(ul); - // CHECK-NEXT: %7 = load i64, ptr %ul.addr, align 8 - // CHECK-NEXT: %8 = call i64 @llvm.cttz.i64(i64 %7, i1 true) - // CHECK-NEXT: %cast2 = trunc i64 %8 to i32 - // CHECK-NEXT: store volatile i32 %cast2, ptr %tz, align 4 + // CHECK-NEXT: %10 = load i64, ptr %ull.addr, align 8 + // CHECK-NEXT: %11 = call i64 @llvm.cttz.i64(i64 %10, i1 true) + // CHECK-NEXT: %cast4 = trunc i64 %11 to i32 + // CHECK-NEXT: store volatile i32 %cast4, ptr %tz, align 4 tz = __builtin_ctzg(ull); - // CHECK-NEXT: %9 = load i64, ptr %ull.addr, align 8 - // CHECK-NEXT: %10 = call i64 @llvm.cttz.i64(i64 %9, i1 true) - // CHECK-NEXT: %cast3 = trunc i64 %10 to i32 - // CHECK-NEXT: store volatile i32 %cast3, ptr %tz, align 4 + // CHECK-NEXT: %12 = load i128, ptr %ui128.addr, align 16 + // CHECK-NEXT: %13 = call i128 @llvm.cttz.i128(i128 %12, i1 true) + // CHECK-NEXT: %cast5 = trunc i128 %13 to i32 + // CHECK-NEXT: store volatile i32 %cast5, ptr %tz, align 4 tz = __builtin_ctzg(ui128); - // CHECK-NEXT: %11 = load i128, ptr %ui128.addr, align 16 - // CHECK-NEXT: %12 = call i128 @llvm.cttz.i128(i128 %11, i1 true) - // CHECK-NEXT: %cast4 = trunc i128 %12 to i32 - // CHECK-NEXT: store volatile i32 %cast4, ptr %tz, align 4 + // CHECK-NEXT: %14 = load i128, ptr %ubi128.addr, align 8 + // CHECK-NEXT: %15 = call i128 @llvm.cttz.i128(i128 %14, i1 true) + // CHECK-NEXT: %cast6 = trunc i128 %15 to i32 + // CHECK-NEXT: store volatile i32 %cast6, ptr %tz, align 4 tz = __builtin_ctzg(ubi128); - // CHECK-NEXT: %13 = load i128, ptr %ubi128.addr, align 8 - // CHECK-NEXT: %14 = call i128 @llvm.cttz.i128(i128 %13, i1 true) - // CHECK-NEXT: %cast5 = trunc i128 %14 to i32 - // CHECK-NEXT: store volatile i32 %cast5, ptr %tz, align 4 - tz = __builtin_ctzg(uc, sc); - // CHECK-NEXT: %15 = load i8, ptr %uc.addr, align 1 - // CHECK-NEXT: %16 = call i8 @llvm.cttz.i8(i8 %15, i1 true) - // CHECK-NEXT: %cast6 = zext i8 %16 to i32 - // CHECK-NEXT: %iszero = icmp eq i8 %15, 0 - // CHECK-NEXT: %17 = load i8, ptr %sc.addr, align 1 - // CHECK-NEXT: %conv = sext i8 %17 to i32 - // CHECK-NEXT: %ctzg = select i1 %iszero, i32 %conv, i32 %cast6 + // CHECK-NEXT: %load_bits7 = load i8, ptr %vb8.addr, align 1 + // CHECK-NEXT: %16 = bitcast i8 %load_bits7 to <8 x i1> + // CHECK-NEXT: %17 = bitcast <8 x i1> %16 to i8 + // CHECK-NEXT: %18 = call i8 @llvm.cttz.i8(i8 %17, i1 true) + // CHECK-NEXT: %cast8 = zext i8 %18 to i32 + // CHECK-NEXT: store volatile i32 %cast8, ptr %tz, align 4 + tz = __builtin_ctzg(vb8); + // CHECK-NEXT: %19 = load i8, ptr %uc.addr, align 1 + // CHECK-NEXT: %20 = call i8 @llvm.cttz.i8(i8 %19, i1 true) + // CHECK-NEXT: %cast9 = zext i8 %20 to i32 + // CHECK-NEXT: %iszero = icmp eq i8 %19, 0 + // CHECK-NEXT: %21 = load i8, ptr %sc.addr, align 1 + // CHECK-NEXT: %conv = sext i8 %21 to i32 + // CHECK-NEXT: %ctzg = select i1 %iszero, i32 %conv, i32 %cast9 // CHECK-NEXT: store volatile i32 %ctzg, ptr %tz, align 4 + tz = __builtin_ctzg(uc, sc); + // CHECK-NEXT: %22 = load i16, ptr %us.addr, align 2 + // CHECK-NEXT: %23 = call i16 @llvm.cttz.i16(i16 %22, i1 true) + // CHECK-NEXT: %cast10 = zext i16 %23 to i32 + // CHECK-NEXT: %iszero11 = icmp eq i16 %22, 0 + // CHECK-NEXT: %24 = load i8, ptr %uc.addr, align 1 + // CHECK-NEXT: %conv12 = zext i8 %24 to i32 + // CHECK-NEXT: %ctzg13 = select i1 %iszero11, i32 %conv12, i32 %cast10 + // CHECK-NEXT: store volatile i32 %ctzg13, ptr %tz, align 4 tz = __builtin_ctzg(us, uc); - // CHECK-NEXT: %18 = load i16, ptr %us.addr, align 2 - // CHECK-NEXT: %19 = call i16 @llvm.cttz.i16(i16 %18, i1 true) - // CHECK-NEXT: %cast7 = zext i16 %19 to i32 - // CHECK-NEXT: %iszero8 = icmp eq i16 %18, 0 - // CHECK-NEXT: %20 = load i8, ptr %uc.addr, align 1 - // CHECK-NEXT: %conv9 = zext i8 %20 to i32 - // CHECK-NEXT: %ctzg10 = select i1 %iszero8, i32 %conv9, i32 %cast7 - // CHECK-NEXT: store volatile i32 %ctzg10, ptr %tz, align 4 + // CHECK-NEXT: %25 = load i32, ptr %ui.addr, align 4 + // CHECK-NEXT: %26 = call i32 @llvm.cttz.i32(i32 %25, i1 true) + // CHECK-NEXT: %iszero14 = icmp eq i32 %25, 0 + // CHECK-NEXT: %27 = load i16, ptr %s.addr, align 2 + // CHECK-NEXT: %conv15 = sext i16 %27 to i32 + // CHECK-NEXT: %ctzg16 = select i1 %iszero14, i32 %conv15, i32 %26 + // CHECK-NEXT: store volatile i32 %ctzg16, ptr %tz, align 4 tz = __builtin_ctzg(ui, s); - // CHECK-NEXT: %21 = load i32, ptr %ui.addr, align 4 - // CHECK-NEXT: %22 = call i32 @llvm.cttz.i32(i32 %21, i1 true) - // CHECK-NEXT: %iszero11 = icmp eq i32 %21, 0 - // CHECK-NEXT: %23 = load i16, ptr %s.addr, align 2 - // CHECK-NEXT: %conv12 = sext i16 %23 to i32 - // CHECK-NEXT: %ctzg13 = select i1 %iszero11, i32 %conv12, i32 %22 - // CHECK-NEXT: store volatile i32 %ctzg13, ptr %tz, align 4 + // CHECK-NEXT: %28 = load i64, ptr %ul.addr, align 8 + // CHECK-NEXT: %29 = call i64 @llvm.cttz.i64(i64 %28, i1 true) + // CHECK-NEXT: %cast17 = trunc i64 %29 to i32 + // CHECK-NEXT: %iszero18 = icmp eq i64 %28, 0 + // CHECK-NEXT: %30 = load i16, ptr %us.addr, align 2 + // CHECK-NEXT: %conv19 = zext i16 %30 to i32 + // CHECK-NEXT: %ctzg20 = select i1 %iszero18, i32 %conv19, i32 %cast17 + // CHECK-NEXT: store volatile i32 %ctzg20, ptr %tz, align 4 tz = __builtin_ctzg(ul, us); - // CHECK-NEXT: %24 = load i64, ptr %ul.addr, align 8 - // CHECK-NEXT: %25 = call i64 @llvm.cttz.i64(i64 %24, i1 true) - // CHECK-NEXT: %cast14 = trunc i64 %25 to i32 - // CHECK-NEXT: %iszero15 = icmp eq i64 %24, 0 - // CHECK-NEXT: %26 = load i16, ptr %us.addr, align 2 - // CHECK-NEXT: %conv16 = zext i16 %26 to i32 - // CHECK-NEXT: %ctzg17 = select i1 %iszero15, i32 %conv16, i32 %cast14 - // CHECK-NEXT: store volatile i32 %ctzg17, ptr %tz, align 4 + // CHECK-NEXT: %31 = load i64, ptr %ull.addr, align 8 + // CHECK-NEXT: %32 = call i64 @llvm.cttz.i64(i64 %31, i1 true) + // CHECK-NEXT: %cast21 = trunc i64 %32 to i32 + // CHECK-NEXT: %iszero22 = icmp eq i64 %31, 0 + // CHECK-NEXT: %33 = load i32, ptr %i.addr, align 4 + // CHECK-NEXT: %ctzg23 = select i1 %iszero22, i32 %33, i32 %cast21 + // CHECK-NEXT: store volatile i32 %ctzg23, ptr %tz, align 4 tz = __builtin_ctzg(ull, i); - // CHECK-NEXT: %27 = load i64, ptr %ull.addr, align 8 - // CHECK-NEXT: %28 = call i64 @llvm.cttz.i64(i64 %27, i1 true) - // CHECK-NEXT: %cast18 = trunc i64 %28 to i32 - // CHECK-NEXT: %iszero19 = icmp eq i64 %27, 0 - // CHECK-NEXT: %29 = load i32, ptr %i.addr, align 4 - // CHECK-NEXT: %ctzg20 = select i1 %iszero19, i32 %29, i32 %cast18 - // CHECK-NEXT: store volatile i32 %ctzg20, ptr %tz, align 4 + // CHECK-NEXT: %34 = load i128, ptr %ui128.addr, align 16 + // CHECK-NEXT: %35 = call i128 @llvm.cttz.i128(i128 %34, i1 true) + // CHECK-NEXT: %cast24 = trunc i128 %35 to i32 + // CHECK-NEXT: %iszero25 = icmp eq i128 %34, 0 + // CHECK-NEXT: %36 = load i32, ptr %i.addr, align 4 + // CHECK-NEXT: %ctzg26 = select i1 %iszero25, i32 %36, i32 %cast24 + // CHECK-NEXT: store volatile i32 %ctzg26, ptr %tz, align 4 tz = __builtin_ctzg(ui128, i); - // CHECK-NEXT: %30 = load i128, ptr %ui128.addr, align 16 - // CHECK-NEXT: %31 = call i128 @llvm.cttz.i128(i128 %30, i1 true) - // CHECK-NEXT: %cast21 = trunc i128 %31 to i32 - // CHECK-NEXT: %iszero22 = icmp eq i128 %30, 0 - // CHECK-NEXT: %32 = load i32, ptr %i.addr, align 4 - // CHECK-NEXT: %ctzg23 = select i1 %iszero22, i32 %32, i32 %cast21 - // CHECK-NEXT: store volatile i32 %ctzg23, ptr %tz, align 4 + // CHECK-NEXT: %37 = load i128, ptr %ubi128.addr, align 8 + // CHECK-NEXT: %38 = call i128 @llvm.cttz.i128(i128 %37, i1 true) + // CHECK-NEXT: %cast27 = trunc i128 %38 to i32 + // CHECK-NEXT: %iszero28 = icmp eq i128 %37, 0 + // CHECK-NEXT: %39 = load i32, ptr %i.addr, align 4 + // CHECK-NEXT: %ctzg29 = select i1 %iszero28, i32 %39, i32 %cast27 + // CHECK-NEXT: store volatile i32 %ctzg29, ptr %tz, align 4 tz = __builtin_ctzg(ubi128, i); - // CHECK-NEXT: %33 = load i128, ptr %ubi128.addr, align 8 - // CHECK-NEXT: %34 = call i128 @llvm.cttz.i128(i128 %33, i1 true) - // CHECK-NEXT: %cast24 = trunc i128 %34 to i32 - // CHECK-NEXT: %iszero25 = icmp eq i128 %33, 0 - // CHECK-NEXT: %35 = load i32, ptr %i.addr, align 4 - // CHECK-NEXT: %ctzg26 = select i1 %iszero25, i32 %35, i32 %cast24 - // CHECK-NEXT: store volatile i32 %ctzg26, ptr %tz, align 4 - // CHECK-NEXT: ret void + // CHECK-NEXT: %load_bits30 = load i8, ptr %vb8.addr, align 1 + // CHECK-NEXT: %40 = bitcast i8 %load_bits30 to <8 x i1> + // CHECK-NEXT: %41 = bitcast <8 x i1> %40 to i8 + // CHECK-NEXT: %42 = call i8 @llvm.cttz.i8(i8 %41, i1 true) + // CHECK-NEXT: %cast31 = zext i8 %42 to i32 + // CHECK-NEXT: %iszero32 = icmp eq i8 %41, 0 + // CHECK-NEXT: %43 = load i32, ptr %i.addr, align 4 + // CHECK-NEXT: %ctzg33 = select i1 %iszero32, i32 %43, i32 %cast31 + // CHECK-NEXT: store volatile i32 %ctzg33, ptr %tz, align 4 + tz = __builtin_ctzg(vb8, i); } #endif >From 65bdc4c536bdf9588d357a8bb7281b96fffc6fb6 Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Thu, 21 Aug 2025 16:13:39 -0500 Subject: [PATCH 2/6] docs --- clang/docs/LanguageExtensions.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 40f8fc9190f94..d6584121b148f 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -4214,7 +4214,9 @@ such as ``unsigned __int128`` and C23 ``unsigned _BitInt(N)``. ``__builtin_clzg`` (respectively ``__builtin_ctzg``) returns the number of leading (respectively trailing) 0 bits in the first argument. The first argument -can be of any unsigned integer type or fixed boolean vector. +can be of any unsigned integer type or fixed boolean vector. Boolean vectors +behave like a bit field where the least significant bits are trailing and the +most significant bits are leading. If the first argument is 0 and an optional second argument of ``int`` type is provided, then the second argument is returned. If the first argument is 0, but >From b6629161080ed60f17b9cd16845ff9c372897062 Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Thu, 21 Aug 2025 16:24:42 -0500 Subject: [PATCH 3/6] simplify codegen --- clang/lib/CodeGen/CGBuiltin.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index b98218c36161d..0979104e945a8 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1704,10 +1704,7 @@ static llvm::Value *EmitBitCountExpr(CodeGenFunction &CGF, const Expr *E) { VT && VT->getElementType()->isIntegerTy(1)) { llvm::Type *StorageType = llvm::Type::getIntNTy(CGF.getLLVMContext(), VT->getNumElements()); - ArgValue = CGF.emitBoolVecConversion( - ArgValue, StorageType->getPrimitiveSizeInBits(), "insertvec"); ArgValue = CGF.Builder.CreateBitCast(ArgValue, StorageType); - ArgType = ArgValue->getType(); } return ArgValue; >From 10f6bb2f6a0f86796daa46f60e5659ad751de4a6 Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Thu, 21 Aug 2025 16:26:47 -0500 Subject: [PATCH 4/6] Improve docs --- clang/docs/LanguageExtensions.rst | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index d6584121b148f..1299582b2f5ea 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -4214,9 +4214,13 @@ such as ``unsigned __int128`` and C23 ``unsigned _BitInt(N)``. ``__builtin_clzg`` (respectively ``__builtin_ctzg``) returns the number of leading (respectively trailing) 0 bits in the first argument. The first argument -can be of any unsigned integer type or fixed boolean vector. Boolean vectors -behave like a bit field where the least significant bits are trailing and the -most significant bits are leading. +can be of any unsigned integer type or fixed boolean vector. + +For boolean vectors, these builtins interpret the vector like a bit-field where +the ith element of the vector is bit i of the bit-field, counting from the +least significant end. ``__builtin_clzg`` returns the number of zero elements at +the end of the vector, while ``__builtin_ctzg`` returns the number of zero +elements at the start of the vector. If the first argument is 0 and an optional second argument of ``int`` type is provided, then the second argument is returned. If the first argument is 0, but >From f29abf9ba336a9eb0bcceb4375cf86bfabeb3e5a Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Thu, 21 Aug 2025 16:29:26 -0500 Subject: [PATCH 5/6] Fix missing line --- clang/docs/ReleaseNotes.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index f0732deec23c3..3e495d8dfa136 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -149,6 +149,7 @@ Non-comprehensive list of changes in this release - Use of ``__has_feature`` to detect the ``ptrauth_qualifier`` and ``ptrauth_intrinsics`` features has been deprecated, and is restricted to the arm64e target only. The correct method to check for these features is to test for the ``__PTRAUTH__`` + macro. - Added a new builtin, ``__builtin_dedup_pack``, to remove duplicate types from a parameter pack. This feature is particularly useful in template metaprogramming for normalizing type lists. >From acd708cf5e8a8b57126df0fcbe3cca35654b2038 Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Thu, 21 Aug 2025 22:13:12 -0500 Subject: [PATCH 6/6] comments --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 5039d9950a4a4..79040d45cb010 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -145,11 +145,11 @@ static llvm::APSInt convertBoolVectorToInt(const Pointer &Val) { assert(Val.getFieldDesc()->isPrimitiveArray() && Val.getFieldDesc()->getElemQualType()->isBooleanType() && "Not a boolean vector"); - unsigned NumElts = Val.getNumElems(); + unsigned NumElems = Val.getNumElems(); // Each element is one bit, so create an integer with NumElts bits. - llvm::APSInt Result(NumElts, 0); - for (unsigned I = 0; I < NumElts; ++I) { + llvm::APSInt Result(NumElems, 0); + for (unsigned I = 0; I != NumElems; ++I) { if (Val.elem<bool>(I)) Result.setBit(I); } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits